From e53ac5b2a65f79d6342b1820c68efc126c8e4986 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Sun, 8 Nov 2015 21:27:40 +0100 Subject: MemStruct: minor fixes + toy example script --- example/jitter/memstruct.py | 229 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 example/jitter/memstruct.py (limited to 'example') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py new file mode 100644 index 00000000..f2e9f8dc --- /dev/null +++ b/example/jitter/memstruct.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python +"""This script is just a short example of common usages for miasm2.analysis.mem. +For a more complete view of what is possible, tests/analysis/mem.py covers +most of the module possibilities, and the module doc gives useful information +as well. +""" + +from miasm2.analysis.machine import Machine +from miasm2.analysis.mem import MemStruct, MemSelf, MemVoid, MemStr,\ + Ptr, Num, Array, set_allocator +from miasm2.os_dep.common import heap + +# Instanciate a heap +my_heap = heap() +# And set it as the default memory allocator, to avoid manual allocation and +# explicit address passing to the MemStruct constructor +set_allocator(my_heap.vm_alloc) + +# Let's reimplement a simple C generic linked list mapped on a VmMngr! + +# All the structures and methods will use the python objects but all the data +# is in fact stored in the VmMngr + +class ListNode(MemStruct): + fields = [ + # The " Date: Tue, 24 Nov 2015 19:14:23 +0100 Subject: MemStruct: Fix MemStruct __eq__ --- example/jitter/memstruct.py | 2 +- miasm2/analysis/mem.py | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'example') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index f2e9f8dc..775b3643 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -184,7 +184,7 @@ assert link.size == 2 # Make the Array Ptr point to the data's array field data.arrayptr = data.get_addr("array") # Now the pointer dereference is equal to the array field's value -assert data.deref_arrayptr == data.array +assert data.deref_arrayptr.value == data.array # Let's say that it is a DataStr: datastr = data.cast(DataStr) diff --git a/miasm2/analysis/mem.py b/miasm2/analysis/mem.py index 2e52ec1a..86db0fc5 100644 --- a/miasm2/analysis/mem.py +++ b/miasm2/analysis/mem.py @@ -875,11 +875,7 @@ class MemStruct(object): return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) def __eq__(self, other): - # Do not test class equality, because of dynamically generated fields - # self.__class__ == other.__class__ and - # Could test attrs? - # TODO: self._attrs == other._attrs and - return str(self) == str(other) + return self.__class__ == other.__class__ and str(self) == str(other) def __ne__(self, other): return not self == other @@ -1216,6 +1212,13 @@ class MemSizedArray(MemArray): items = ', '.join(item_reprs) return "[%s] [%r; %s]" % (items, self._field_type, self._array_len) + def __eq__(self, other): + # Special implementation to handle dynamic subclasses + return isinstance(other, MemSizedArray) and \ + self._field_type == other._field_type and \ + self._array_len == other._array_len and \ + str(self) == str(other) + def mem_array_type(field_type): """Generate a MemArray subclass that has a fixed @field_type. It allows to -- cgit 1.4.1 From 8bd16bab859480582f89962c878e867d6c8ab985 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Tue, 24 Nov 2015 19:16:03 +0100 Subject: MemStruct: fix example to use Ptr to MemSizedArray rather that Array --- example/jitter/memstruct.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'example') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index 775b3643..934c1c22 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -7,7 +7,7 @@ as well. from miasm2.analysis.machine import Machine from miasm2.analysis.mem import MemStruct, MemSelf, MemVoid, MemStr,\ - Ptr, Num, Array, set_allocator + MemSizedArray, Ptr, Num, Array, set_allocator from miasm2.os_dep.common import heap # Instanciate a heap @@ -130,7 +130,7 @@ class DataArray(MemStruct): # MemStruct containing only one field named "value" will be created, so # that Ptr can point to a MemStruct instance. Here, # data_array.deref_array.value will allow to access an Array - ("arrayptr", Ptr(" Date: Thu, 26 Nov 2015 14:12:06 +0100 Subject: MemStruct example: fix memset usage --- example/jitter/memstruct.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'example') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index 934c1c22..a79686a8 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -56,10 +56,6 @@ class LinkedList(MemStruct): ("size", Num(" Date: Fri, 27 Nov 2015 13:18:44 +0100 Subject: MemStruct: Big refactoring, Mem* -> Pinned* This commit is the first phase of the Type refactor. The PinnedType class has been separated from the more specific PinnedStruct class. --- example/jitter/memstruct.py | 48 ++--- miasm2/analysis/mem.py | 469 +++++++++++++++++++++++--------------------- test/analysis/mem.py | 112 +++++------ 3 files changed, 325 insertions(+), 304 deletions(-) (limited to 'example') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index a79686a8..645c019e 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -6,14 +6,14 @@ as well. """ from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import MemStruct, MemSelf, MemVoid, MemStr,\ - MemSizedArray, Ptr, Num, Array, set_allocator +from miasm2.analysis.mem import PinnedStruct, PinnedSelf, PinnedVoid, PinnedStr,\ + PinnedSizedArray, Ptr, Num, Array, set_allocator from miasm2.os_dep.common import heap # Instanciate a heap my_heap = heap() # And set it as the default memory allocator, to avoid manual allocation and -# explicit address passing to the MemStruct constructor +# explicit address passing to the PinnedStruct constructor set_allocator(my_heap.vm_alloc) # Let's reimplement a simple C generic linked list mapped on a VmMngr! @@ -21,18 +21,18 @@ set_allocator(my_heap.vm_alloc) # All the structures and methods will use the python objects but all the data # is in fact stored in the VmMngr -class ListNode(MemStruct): +class ListNode(PinnedStruct): fields = [ # The " "u32" - class MyStruct(MemStruct): + class MyStruct(PinnedStruct): fields = [ # Integer field: just struct.pack fields with one value ("num", Num("I")), @@ -13,7 +13,7 @@ The main idea is to declare the fields of the structure in the class: # (self.deref_). Deref can be read and set. ("other", Ptr("I", OtherStruct)), # Ptr to a variable length String - ("s", Ptr("I", MemStr)), + ("s", Ptr("I", PinnedStr)), ("i", Ptr("I", Num("I"))), ] @@ -32,18 +32,18 @@ structure will be automatically allocated in memory: # the allocator is a func(VmMngr) -> integer_address set_allocator(my_heap) -Note that some structures (e.g. MemStr or MemArray) do not have a static size -and cannot be allocated automatically. +Note that some structures (e.g. PinnedStr or PinnedArray) do not have a static +size and cannot be allocated automatically. As you saw previously, to use this module, you just have to inherit from -MemStruct and define a list of (, ). Available +PinnedStruct and define a list of (, ). Available Type classes are: - Num: for number (float or int) handling - RawStruct: abstraction over a simple struct pack/unpack - - Ptr: a pointer to another MemStruct instance - - Inline: include another MemStruct as a field (equivalent to having a + - Ptr: a pointer to another PinnedType instance + - FIXME: TODEL Inline: include another PinnedStruct as a field (equivalent to having a struct field into another struct in C) - Array: a fixed size array of Types (points) - Union: similar to `union` in C, list of Types at the same offset in a @@ -56,24 +56,25 @@ A Type always has a fixed size in memory. Some special memory structures are already implemented; they all are subclasses -of MemStruct with a custom implementation: - - - MemSelf: this class is just a special marker to reference a MemStruct - subclass inside itself. Works with Ptr and Array (e.g. Ptr(_, MemSelf) - for a pointer the same type as the class who uses this kind of field) - - MemVoid: empty MemStruct, placeholder to be casted to an implemented - MemStruct subclass - - MemStr: represents a string in memory; the encoding can be passed to the +of PinnedType with a custom implementation: + + - PinnedSelf: this class is just a special marker to reference a + PinnedStruct subclass inside itself. Works with Ptr and Array (e.g. + Ptr(_, PinnedSelf) for a pointer the same type as the class who uses this + kind of field) + - PinnedVoid: empty PinnedType, placeholder to be casted to an implemented + PinnedType subclass + - PinnedStr: represents a string in memory; the encoding can be passed to the constructor (null terminated ascii/ansi or null terminated utf16) - - MemArray: an unsized array of Type; unsized here means that there is + - PinnedArray: an unsized array of Type; unsized here means that there is no defined sized for this array, equivalent to a int* or char*-style table in C. It cannot be allocated automatically, since it has no known size - - MemSizedArray: a sized MemArray, can be automatically allocated in memory - and allows more operations than MemArray - - mem: a function that dynamically generates a MemStruct subclass from a + - PinnedSizedArray: a sized PinnedArray, can be automatically allocated in memory + and allows more operations than PinnedArray + - pin: a function that dynamically generates a PinnedStruct subclass from a Type. This class has only one field named "value". -A MemStruct do not always have a static size (cls.sizeof()) nor a dynamic size +A PinnedType do not always have a static size (cls.sizeof()) nor a dynamic size (self.get_size()). """ @@ -87,15 +88,15 @@ log.addHandler(console_handler) log.setLevel(logging.WARN) # ALLOCATOR is a function(vm, size) -> allocated_address -# TODO: as a MemStruct class attribute +# TODO: as a PinnedType class attribute ALLOCATOR = None -# Cache for dynamically generated MemStructs +# Cache for dynamically generated PinnedTypes DYN_MEM_STRUCT_CACHE = {} def set_allocator(alloc_func): """Set an allocator for this module; allows to instanciate statically sized - MemStructs (i.e. sizeof() is implemented) without specifying the address + PinnedTypes (i.e. sizeof() is implemented) without specifying the address (the object is allocated by @alloc_func in the vm. @alloc_func: func(VmMngr) -> integer_address @@ -166,10 +167,10 @@ def set_str_utf16(vm, addr, s): vm.set_mem(addr, s) -# Type to MemStruct helper +# Type to PinnedType helper -def mem(field): - """Generate a MemStruct subclass from a field. The field's value can +def pin(field): + """Generate a PinnedStruct subclass from a field. The field's value can be accessed through self.value or self.deref_value if field is a Ptr. @field: a Type instance. @@ -179,7 +180,7 @@ def mem(field): fields = [("value", field)] # Build a type to contain the field type - mem_type = type("Mem%r" % field, (MemStruct,), {'fields': fields}) + mem_type = type("Pinned%r" % field, (PinnedStruct,), {'fields': fields}) DYN_MEM_STRUCT_CACHE[field] = mem_type return mem_type @@ -187,7 +188,7 @@ def mem(field): # Type classes class Type(object): - """Base class to provide methods to set and get fields from virtual mem. + """Base class to provide methods to set and get fields from virtual pin. Subclasses can either override _pack and _unpack, or get and set if data serialization requires more work (see Inline implementation for an example). @@ -225,8 +226,8 @@ class Type(object): return self._self_type def _set_self_type(self, self_type): - """If this field refers to MemSelf, replace it with @self_type (a - MemStruct subclass) when using it. Generally not used outside the lib. + """If this field refers to PinnedSelf, replace it with @self_type (a + PinnedType subclass) when using it. Generally not used outside the lib. """ self._self_type = self_type @@ -289,55 +290,55 @@ class Num(RawStruct): class Ptr(Num): """Special case of number of which value indicates the address of a - MemStruct. Provides deref_ as well as when used, to set and - get the pointed MemStruct. + PinnedType. Provides deref_ as well as when used, to set and + get the pointed PinnedType. """ def __init__(self, fmt, dst_type, *type_args, **type_kwargs): """ @fmt: (str) Num compatible format that will be the Ptr representation in memory - @dst_type: (MemStruct or Type) the MemStruct this Ptr points to. - If a Type is given, it is transformed into a MemStruct with - mem(TheType). + @dst_type: (PinnedType or Type) the PinnedType this Ptr points to. + If a Type is given, it is transformed into a PinnedType with + pin(TheType). *type_args, **type_kwargs: arguments to pass to the the pointed - MemStruct when instanciating it (e.g. for MemStr encoding or - MemArray field_type). + PinnedType when instanciating it (e.g. for PinnedStr encoding or + PinnedArray field_type). """ if (not isinstance(dst_type, Type) and not (isinstance(dst_type, type) and - issubclass(dst_type, MemStruct)) and - not dst_type == MemSelf): - raise ValueError("dst_type of Ptr must be a MemStruct type, a " - "Type instance, the MemSelf marker or a class " + issubclass(dst_type, PinnedType)) and + not dst_type == PinnedSelf): + raise ValueError("dst_type of Ptr must be a PinnedType type, a " + "Type instance, the PinnedSelf marker or a class " "name.") super(Ptr, self).__init__(fmt) if isinstance(dst_type, Type): - # Patch the field to propagate the MemSelf replacement + # Patch the field to propagate the PinnedSelf replacement dst_type._get_self_type = lambda: self._get_self_type() # dst_type cannot be patched here, since _get_self_type of the outer # class has not yet been set. Patching dst_type involves calling - # mem(dst_type), which will only return a type that does not point - # on MemSelf but on the right class only when _get_self_type of the - # outer class has been replaced by _MetaMemStruct. - # In short, dst_type = mem(dst_type) is not valid here, it is done + # pin(dst_type), which will only return a type that does not point + # on PinnedSelf but on the right class only when _get_self_type of the + # outer class has been replaced by _MetaPinnedStruct. + # In short, dst_type = pin(dst_type) is not valid here, it is done # lazily in _fix_dst_type self._dst_type = dst_type self._type_args = type_args self._type_kwargs = type_kwargs def _fix_dst_type(self): - if self._dst_type == MemSelf: + if self._dst_type == PinnedSelf: if self._get_self_type() is not None: self._dst_type = self._get_self_type() else: - raise ValueError("Unsupported usecase for MemSelf, sorry") + raise ValueError("Unsupported usecase for PinnedSelf, sorry") if isinstance(self._dst_type, Type): - self._dst_type = mem(self._dst_type) + self._dst_type = pin(self._dst_type) @property def dst_type(self): - """Return the type (MemStruct subtype) this Ptr points to.""" + """Return the type (PinnedType subtype) this Ptr points to.""" self._fix_dst_type() return self._dst_type @@ -348,7 +349,7 @@ class Ptr(Num): return self.dst_type(vm, addr, *self._type_args, **self._type_kwargs) def deref_set(self, vm, addr, val): - """Serializes the @val MemStruct subclass instance in @vm (VmMngr) at + """Serializes the @val PinnedType subclass instance in @vm (VmMngr) at @addr. Equivalent to a pointer dereference assignment in C. """ # Sanity check @@ -374,29 +375,29 @@ class Ptr(Num): class Inline(Type): - """Field used to inline a MemStruct in another MemStruct. Equivalent to + """Field used to inline a PinnedType in another PinnedType. Equivalent to having a struct field in a C struct. Concretely: - class MyStructClass(MemStruct): + class MyStructClass(PinnedStruct): fields = [("f1", Num("I")), ("f2", Num("I"))] - class Example(MemStruct): + class Example(PinnedStruct): fields = [("mystruct", Inline(MyStructClass))] ex = Example(vm, addr) ex.mystruct.f2 = 3 # inlined structure field access ex.mystruct = MyStructClass(vm, addr2) # struct copy - It can be seen like a bridge to use a MemStruct as a Type + It can be seen like a bridge to use a PinnedStruct as a Type - TODO: make the Inline implicit when setting a field to be a MemStruct + TODO: make the Inline implicit when setting a field to be a PinnedStruct """ def __init__(self, inlined_type, *type_args, **type_kwargs): - if not issubclass(inlined_type, MemStruct): - raise ValueError("inlined type if Inline must be a MemStruct") + if not issubclass(inlined_type, PinnedStruct): + raise ValueError("inlined type if Inline must be a PinnedStruct") self._il_type = inlined_type self._type_args = type_args self._type_kwargs = type_kwargs @@ -428,16 +429,16 @@ class Array(Type): """A fixed size array (contiguous sequence) of a Type subclass elements. Similar to something like the char[10] type in C. - Getting an array field actually returns a MemSizedArray. Setting it is - possible with either a list or a MemSizedArray instance. Examples of syntax: + Getting an array field actually returns a PinnedSizedArray. Setting it is + possible with either a list or a PinnedSizedArray instance. Examples of syntax: - class Example(MemStruct): + class Example(PinnedStruct): fields = [("array", Array(Num("B"), 4))] mystruct = Example(vm, addr) mystruct.array[3] = 27 mystruct.array = [1, 4, 8, 9] - mystruct.array = MemSizedArray(vm, addr2, Num("B"), 4) + mystruct.array = PinnedSizedArray(vm, addr2, Num("B"), 4) """ def __init__(self, field_type, array_len): @@ -449,17 +450,17 @@ class Array(Type): self.field_type._set_self_type(self_type) def set(self, vm, addr, val): - # MemSizedArray assignment - if isinstance(val, MemSizedArray): + # PinnedSizedArray assignment + if isinstance(val, PinnedSizedArray): if val.array_len != self.array_len or len(val) != self.size(): - raise ValueError("Size mismatch in MemSizedArray assignment") + raise ValueError("Size mismatch in PinnedSizedArray assignment") raw = str(val) vm.set_mem(addr, raw) # list assignment elif isinstance(val, list): if len(val) != self.array_len: - raise ValueError("Size mismatch in MemSizedArray assignment ") + raise ValueError("Size mismatch in PinnedSizedArray assignment ") offset = 0 for elt in val: self.field_type.set(vm, addr + offset, elt) @@ -467,10 +468,10 @@ class Array(Type): else: raise RuntimeError( - "Assignment only implemented for list and MemSizedArray") + "Assignment only implemented for list and PinnedSizedArray") def get(self, vm, addr): - return MemSizedArray(vm, addr, self.field_type, self.array_len) + return PinnedSizedArray(vm, addr, self.field_type, self.array_len) def size(self): return self.field_type.size() * self.array_len @@ -488,12 +489,12 @@ class Array(Type): class Union(Type): - """Allows to put multiple fields at the same offset in a MemStruct, similar + """Allows to put multiple fields at the same offset in a PinnedStruct, similar to unions in C. The Union will have the size of the largest of its fields. Example: - class Example(MemStruct): + class Example(PinnedStruct): fields = [("uni", Union([ ("f1", Num(". + useless most of the time since fields are accessible via self.. """ if name not in self._attrs: - raise AttributeError("'%s' object has no attribute '%s'" + raise attributeerror("'%s' object has no attribute '%s'" % (self.__class__.__name__, name)) field = self._attrs[name]["field"] offset = self._attrs[name]["offset"] return field.get(self._vm, self.get_addr() + offset) def set_field(self, name, val): - """Set a field value by name. @val is the python value corresponding to + """set a field value by name. @val is the python value corresponding to this field type. - Useless most of the time since fields are accessible via self.. + useless most of the time since fields are accessible via self.. """ if name not in self._attrs: - raise AttributeError("'%s' object has no attribute '%s'" + raise attributeerror("'%s' object has no attribute '%s'" % (self.__class__.__name__, name)) field = self._attrs[name]["field"] offset = self._attrs[name]["offset"] field.set(self._vm, self.get_addr() + offset, val) def deref_field(self, name): - """Get the MemStruct pointed by field. + """get the memstruct pointed by field. - Useless most of the time since fields are accessible via + useless most of the time since fields are accessible via self.deref_. """ addr = self.get_field(name) field = self._attrs[name]["field"] assert isinstance(field, Ptr),\ - "Programming error: field should be a Ptr" + "programming error: field should be a Ptr" return field.deref_get(self._vm, addr) def set_deref_field(self, name, val): - """Set the MemStruct pointed by field. @val should be of the - type of the pointed MemStruct. The field must be a Ptr. + """set the memstruct pointed by field. @val should be of the + type of the pointed memstruct. the field must be a Ptr. - Useless most of the time since fields are accessible via + useless most of the time since fields are accessible via self.deref_. """ addr = self.get_field(name) field = self._attrs[name]["field"] assert isinstance(field, Ptr),\ - "Programming error: field should be a Ptr" + "programming error: field should be a Ptr" field.deref_set(self._vm, addr, val) - def memset(self, byte='\x00'): - """Fill the memory space of this MemStruct with @byte ('\x00' by - default). The size is retrieved with self.get_size() (dynamic size). - """ - # TODO: multibyte patterns - if not isinstance(byte, str) or not len(byte) == 1: - raise ValueError("byte must be a 1-lengthed str") - self._vm.set_mem(self.get_addr(), byte * self.get_size()) - - def cast(self, other_type, *type_args, **type_kwargs): - """Cast this MemStruct to another MemStruct (same address, same vm, but - different type). Return the casted MemStruct. + def cast_field(self, field, other_type, *type_args, **type_kwargs): """ - return self.cast_field(None, other_type, *type_args, **type_kwargs) - - def cast_field(self, field_name, other_type, *type_args, **type_kwargs): - """Same as cast, but the address of the returned MemStruct is the - address at which @field_name is in the current MemStruct. + @field: a field name """ - return other_type(self._vm, self.get_addr(field_name), + return other_type(self._vm, self.get_addr(field), *type_args, **type_kwargs) - def __len__(self): - return self.get_size() - - def raw(self): - """Raw binary (str) representation of the MemStruct as it is in - memory. - """ - attrs = sorted(self._attrs.itervalues(), key=lambda a: a["offset"]) - out = [] - for attr in attrs: - field = attr["field"] - offset = attr["offset"] - out.append(self._vm.get_mem(self.get_addr() + offset, field.size())) - return ''.join(out) - - def __str__(self): - return self.raw() - - def __repr__(self): - attrs = sorted(self._attrs.iteritems(), key=lambda a: a[1]["offset"]) - out = [] - for name, attr in attrs: - field = attr["field"] - val_repr = repr(self.get_field(name)) - if '\n' in val_repr: - val_repr = '\n' + indent(val_repr, 4) - out.append("%s: %r = %s" % (name, field, val_repr)) - return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) - - def __eq__(self, other): - return self.__class__ == other.__class__ and str(self) == str(other) - - def __ne__(self, other): - return not self == other # Field generation methods, voluntarily public to be able to regen fields # after class definition @@ -896,18 +917,18 @@ class MemStruct(object): Useful in case of a type cyclic dependency. For example, the following is not possible in python: - class A(MemStruct): + class A(PinnedStruct): fields = [("b", Ptr("I", B))] - class B(MemStruct): + class B(PinnedStruct): fields = [("a", Ptr("I", A))] With gen_fields, the following is the legal equivalent: - class A(MemStruct): + class A(PinnedStruct): pass - class B(MemStruct): + class B(PinnedStruct): fields = [("a", Ptr("I", A))] A.fields = [("b", Ptr("I", B))] @@ -962,21 +983,21 @@ class MemStruct(object): cls.gen_field(name, field, offset) -class MemSelf(MemStruct): +class PinnedSelf(PinnedStruct): """Special Marker class for reference to current class in a Ptr or Array (mostly Array of Ptr). Example: - class ListNode(MemStruct): + class ListNode(PinnedStruct): fields = [ - ("next", Ptr("). Deref can be read and set. ("other", Ptr("I", OtherStruct)), # Ptr to a variable length String - ("s", Ptr("I", MemStr)), + ("s", Ptr("I", PinnedStr)), ("i", Ptr("I", Num("I"))), ] @@ -43,7 +43,7 @@ addr_str3 = 0x1300 jitter.vm.add_memory_page(addr, PAGE_READ | PAGE_WRITE, "\xaa"*size) -# MemStruct tests +# PinnedStruct tests ## Creation # Use manual allocation with explicit addr for the first example mstruct = MyStruct(jitter.vm, addr) @@ -57,7 +57,7 @@ assert mstruct.num == 3 memval = struct.unpack("I", jitter.vm.get_mem(mstruct.get_addr(), 4))[0] assert memval == 3 -## Memset sets the whole structure +## Pinnedset sets the whole structure mstruct.memset() assert mstruct.num == 0 assert mstruct.flags == 0 @@ -103,7 +103,7 @@ assert other2.foo == 0xbeef assert other.get_addr() != other2.get_addr() # Not the same address assert other == other2 # But same value -## Same stuff for Ptr to MemField +## Same stuff for Ptr to PinnedField alloc_addr = my_heap.vm_alloc(jitter.vm, mstruct.get_field_type("i").dst_type.sizeof()) mstruct.i = alloc_addr @@ -116,7 +116,7 @@ assert memval == 8 # Str tests ## Basic tests -memstr = MemStr(jitter.vm, addr_str) +memstr = PinnedStr(jitter.vm, addr_str) memstr.value = "" assert memstr.value == "" assert jitter.vm.get_mem(memstr.get_addr(), 1) == '\x00' @@ -125,7 +125,7 @@ assert jitter.vm.get_mem(memstr.get_addr(), memstr.get_size()) == 'lala\x00' jitter.vm.set_mem(memstr.get_addr(), 'MIAMs\x00') assert memstr.value == 'MIAMs' -## Ptr(MemStr) manipulations +## Ptr(PinnedStr) manipulations mstruct.s = memstr.get_addr() assert mstruct.s == addr_str assert mstruct.deref_s == memstr @@ -135,25 +135,25 @@ assert mstruct.deref_s.value == "That's all folks!" assert memstr.value == "That's all folks!" ## Other address, same value, same encoding -memstr2 = MemStr(jitter.vm, addr_str2) +memstr2 = PinnedStr(jitter.vm, addr_str2) memstr2.value = "That's all folks!" assert memstr2.get_addr() != memstr.get_addr() assert memstr2 == memstr ## Same value, other encoding -memstr3 = MemStr(jitter.vm, addr_str3, "utf16") +memstr3 = PinnedStr(jitter.vm, addr_str3, "utf16") memstr3.value = "That's all folks!" assert memstr3.get_addr() != memstr.get_addr() assert memstr3.get_size() != memstr.get_size() # Size is different -assert str(memstr3) != str(memstr) # Mem representation is different +assert str(memstr3) != str(memstr) # Pinned representation is different assert memstr3 != memstr # Encoding is different, so they are not eq assert memstr3.value == memstr.value # But the python value is the same -# MemArray tests +# PinnedArray tests # Allocate buffer manually, since memarray is unsized alloc_addr = my_heap.vm_alloc(jitter.vm, 0x100) -memarray = MemArray(jitter.vm, alloc_addr, Num("I")) +memarray = PinnedArray(jitter.vm, alloc_addr, Num("I")) # This also works: _memarray = mem_array_type(Num("I"))(jitter.vm, alloc_addr) memarray[0] = 0x02 @@ -193,8 +193,8 @@ except ValueError: pass -# MemSizedArray tests -memsarray = MemSizedArray(jitter.vm, None, Num("I"), 10) +# PinnedSizedArray tests +memsarray = PinnedSizedArray(jitter.vm, None, Num("I"), 10) # This also works: _memsarray = mem_sized_array_type(Num("I"), 10)(jitter.vm) # And mem_sized_array_type generates statically sized types @@ -212,7 +212,7 @@ assert str(memsarray) == '\x02\x00\x00\x00' + '\xcc' * (4 * 9) # Atypical fields (RawStruct and Array) -class MyStruct2(MemStruct): +class MyStruct2(PinnedStruct): fields = [ ("s1", RawStruct("=BI")), ("s2", Array(Num("B"), 10)), @@ -244,8 +244,8 @@ ms2.s2 = [1] * 10 for val in ms2.s2: assert val == 1 -### Field assignment (MemSizedArray) -array2 = MemSizedArray(jitter.vm, None, Num("B"), 10) +### Field assignment (PinnedSizedArray) +array2 = PinnedSizedArray(jitter.vm, None, Num("B"), 10) jitter.vm.set_mem(array2.get_addr(), '\x02'*10) for val in array2: assert val == 2 @@ -255,13 +255,13 @@ for val in ms2.s2: # Inline tests -class InStruct(MemStruct): +class InStruct(PinnedStruct): fields = [ ("foo", Num("B")), ("bar", Num("B")), ] -class ContStruct(MemStruct): +class ContStruct(PinnedStruct): fields = [ ("one", Num("B")), ("instruct", Inline(InStruct)), @@ -294,7 +294,7 @@ assert jitter.vm.get_mem(cont.get_addr(), len(cont)) == '\x01\x02\x03\x04' # Union test -class UniStruct(MemStruct): +class UniStruct(PinnedStruct): fields = [ ("one", Num("B")), ("union", Union([ @@ -320,7 +320,7 @@ assert uni.instruct.bar == 0x22 # BitField test -class BitStruct(MemStruct): +class BitStruct(PinnedStruct): fields = [ ("flags", BitField(Num("H"), [ ("f1_1", 1), @@ -353,24 +353,24 @@ assert bit.f4_1 == 1 # Unhealthy ideas -class UnhealthyIdeas(MemStruct): +class UnhealthyIdeas(PinnedStruct): fields = [ - ("pastruct", Ptr("I", MemArray, RawStruct("=Bf"))), - ("apstr", Array(Ptr("I", MemStr), 10)), - ("pself", Ptr("I", MemSelf)), - ("apself", Array(Ptr("I", MemSelf), 2)), - ("ppself", Ptr("I", Ptr("I", MemSelf))), - ("pppself", Ptr("I", Ptr("I", Ptr("I", MemSelf)))), + ("pastruct", Ptr("I", PinnedArray, RawStruct("=Bf"))), + ("apstr", Array(Ptr("I", PinnedStr), 10)), + ("pself", Ptr("I", PinnedSelf)), + ("apself", Array(Ptr("I", PinnedSelf), 2)), + ("ppself", Ptr("I", Ptr("I", PinnedSelf))), + ("pppself", Ptr("I", Ptr("I", Ptr("I", PinnedSelf)))), ] # Other way to handle self dependency and circular dependencies -# NOTE: in this case, MemSelf would have been fine +# NOTE: in this case, PinnedSelf would have been fine UnhealthyIdeas.fields.append( ("pppself2", Ptr("I", Ptr("I", Ptr("I", UnhealthyIdeas))))) # Regen all fields UnhealthyIdeas.gen_fields() -p_size = Ptr("I", MemVoid).size() +p_size = Ptr("I", PinnedVoid).size() ideas = UnhealthyIdeas(jitter.vm) ideas.memset() @@ -401,34 +401,34 @@ assert ideas.deref_pppself.deref_value.deref_value == ideas # Cast tests -# MemStruct cast -MemInt = mem(Num("I")) -MemShort = mem(Num("H")) -dword = MemInt(jitter.vm) +# PinnedStruct cast +PinnedInt = pin(Num("I")) +PinnedShort = pin(Num("H")) +dword = PinnedInt(jitter.vm) dword.value = 0x12345678 -assert isinstance(dword.cast(MemShort), MemShort) -assert dword.cast(MemShort).value == 0x5678 +assert isinstance(dword.cast(PinnedShort), PinnedShort) +assert dword.cast(PinnedShort).value == 0x5678 # Field cast ms2.s2[0] = 0x34 ms2.s2[1] = 0x12 -assert ms2.cast_field("s2", MemShort).value == 0x1234 +assert ms2.cast_field("s2", PinnedShort).value == 0x1234 # Other method -assert MemShort(jitter.vm, ms2.get_addr("s2")).value == 0x1234 +assert PinnedShort(jitter.vm, ms2.get_addr("s2")).value == 0x1234 # Manual cast inside an Array ms2.s2[4] = 0xcd ms2.s2[5] = 0xab -assert MemShort(jitter.vm, ms2.s2.index2addr(4)).value == 0xabcd +assert PinnedShort(jitter.vm, ms2.s2.index2addr(4)).value == 0xabcd # void* style cast -MemPtrVoid = mem(Ptr("I", MemVoid)) -MemPtrMyStruct = mem(Ptr("I", MyStruct)) -p = MemPtrVoid(jitter.vm) +PinnedPtrVoid = pin(Ptr("I", PinnedVoid)) +PinnedPtrMyStruct = pin(Ptr("I", MyStruct)) +p = PinnedPtrVoid(jitter.vm) p.value = mstruct.get_addr() assert p.deref_value.cast(MyStruct) == mstruct -assert p.cast(MemPtrMyStruct).deref_value == mstruct +assert p.cast(PinnedPtrMyStruct).deref_value == mstruct # Field equality tests assert RawStruct("IH") == RawStruct("IH") @@ -463,13 +463,13 @@ assert BitField(Num("B"), [("f1", 1), ("f2", 4), ("f3", 1)]) != \ BitField(Num("B"), [("f1", 2), ("f2", 4), ("f3", 1)]) -# Quick mem(MemField)/MemField hash test: -assert mem(Num("f"))(jitter.vm, addr) == mem(Num("f"))(jitter.vm, addr) +# Quick pin(PinnedField)/PinnedField hash test: +assert pin(Num("f"))(jitter.vm, addr) == pin(Num("f"))(jitter.vm, addr) # Types are cached -assert mem(Num("f")) == mem(Num("f")) -assert mem(Num("d")) != mem(Num("f")) -assert mem(Union([("f1", Num("I")), ("f2", Num("H"))])) == \ - mem(Union([("f1", Num("I")), ("f2", Num("H"))])) +assert pin(Num("f")) == pin(Num("f")) +assert pin(Num("d")) != pin(Num("f")) +assert pin(Union([("f1", Num("I")), ("f2", Num("H"))])) == \ + pin(Union([("f1", Num("I")), ("f2", Num("H"))])) assert mem_array_type(Num("B")) == mem_array_type(Num("B")) assert mem_array_type(Num("I")) != mem_array_type(Num("B")) assert mem_sized_array_type(Num("B"), 20) == mem_sized_array_type(Num("B"), 20) @@ -485,8 +485,8 @@ print repr(cont), '\n' print repr(uni), '\n' print repr(bit), '\n' print repr(ideas), '\n' -print repr(mem(Array(Inline(MyStruct2), 2))(jitter.vm, addr)), '\n' -print repr(mem(Num("f"))(jitter.vm, addr)), '\n' +print repr(pin(Array(Inline(MyStruct2), 2))(jitter.vm, addr)), '\n' +print repr(pin(Num("f"))(jitter.vm, addr)), '\n' print repr(memarray) print repr(memsarray) print repr(memstr) -- cgit 1.4.1 From d19f4c1dbdd2f1f451d03551abb0e5ebf4d455be Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Sun, 29 Nov 2015 22:03:11 +0100 Subject: MemStruct: big refactor in process Doc is currently incoherent, impl will also be completed --- example/jitter/memstruct.py | 29 +-- miasm2/analysis/mem.py | 480 ++++++++++++++++++++++++-------------------- test/analysis/mem.py | 246 ++++++++++++----------- 3 files changed, 407 insertions(+), 348 deletions(-) (limited to 'example') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index 645c019e..6e8e13af 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -36,15 +36,15 @@ class ListNode(PinnedStruct): ] def get_next(self): - if self.next == 0: + if self.next.val == 0: return None - return self.deref_next + return self.next.deref def get_data(self, data_type=None): if data_type is not None: - return self.deref_data.cast(data_type) + return self.data.deref.cast(data_type) else: - return self.deref_data + return self.data.deref class LinkedList(PinnedStruct): @@ -60,12 +60,12 @@ class LinkedList(PinnedStruct): """Returns the head ListNode instance""" if self.head == 0: return None - return self.deref_head + return self.head.deref def get_tail(self): if self.tail == 0: return None - return self.deref_tail + return self.tail.deref def push(self, data): # Allocate a new node @@ -112,7 +112,7 @@ class LinkedList(PinnedStruct): if not self.empty(): cur = self.get_head() while cur is not None: - yield cur.deref_data + yield cur.data.deref cur = cur.get_next() @@ -123,9 +123,9 @@ class DataArray(PinnedStruct): ("val1", Num("B")), ("val2", Num("B")), # Ptr can also be instanciated with a PinnedField as an argument, a special - # PinnedStruct containing only one field named "value" will be created, so + # PinnedStruct containing only one field named "val" will be created, so # that Ptr can point to a PinnedStruct instance. Here, - # data_array.deref_array.value will allow to access an Array + # data_array.array.deref.val will allow to access an Array ("arrayptr", Ptr(" as well as when used, to set and + PinnedType. + + FIXME: DOC + + Provides deref_ as well as when used, to set and get the pointed PinnedType. """ @@ -360,7 +364,7 @@ class Ptr(Num): else: raise ValueError("Unsupported usecase for PinnedSelf, sorry") if isinstance(self._dst_type, Type): - self._dst_type = pin(self._dst_type) + self._dst_type = self._dst_type.pinned @property def dst_type(self): @@ -368,11 +372,28 @@ class Ptr(Num): self._fix_dst_type() return self._dst_type + def set(self, vm, addr, val): + if isinstance(val, PinnedType) and isinstance(val.get_type(), Ptr): + self.set_val(vm, addr, val.val) + else: + super(Ptr, self).set(vm, addr, val) + + def get(self, vm, addr): + return self.pinned(vm, addr) + + def get_val(self, vm, addr): + return super(Ptr, self).get(vm, addr) + + def set_val(self, vm, addr, val): + return super(Ptr, self).set(vm, addr, val) + def deref_get(self, vm, addr): """Deserializes the data in @vm (VmMngr) at @addr to self.dst_type. Equivalent to a pointer dereference rvalue in C. """ - return self.dst_type(vm, addr, *self._type_args, **self._type_kwargs) + dst_addr = self.get_val(vm, addr) + return self.dst_type(vm, dst_addr, + *self._type_args, **self._type_kwargs) def deref_set(self, vm, addr, val): """Serializes the @val PinnedType subclass instance in @vm (VmMngr) at @@ -384,7 +405,8 @@ class Ptr(Num): self._dst_type.__name__, val.__class__.__name__) # Actual job - vm.set_mem(addr, str(val)) + dst_addr = self.get_val(vm, addr) + vm.set_mem(dst_addr, str(val)) def _get_pinned_base_class(self): return PinnedPtr @@ -403,7 +425,7 @@ class Ptr(Num): self._type_args)) -class Inline(Type): +class Struct(Type): """Field used to inline a PinnedType in another PinnedType. Equivalent to having a struct field in a C struct. @@ -424,34 +446,145 @@ class Inline(Type): TODO: make the Inline implicit when setting a field to be a PinnedStruct """ - def __init__(self, inlined_type, *type_args, **type_kwargs): - if not issubclass(inlined_type, PinnedStruct): - raise ValueError("inlined type if Inline must be a PinnedStruct") - self._il_type = inlined_type - self._type_args = type_args - self._type_kwargs = type_kwargs + def __init__(self, name, fields): + self.name = name + # fields is immutable + self._fields = tuple(fields) + self._gen_fields() + + def _gen_fields(self): + """Precompute useful metadata on self.fields.""" + self._fields_desc = {} + offset = 0 + for name, field in self._fields: + # For reflexion + field._set_self_type(self) + self._gen_field(name, field, offset) + offset += field.size() + self._size = offset + + def _gen_field(self, name, field, offset): + """Generate only one field + + @name: (str) the name of the field + @field: (Type instance) the field type + @offset: (int) the offset of the field in the structure + """ + self._fields_desc[name] = {"field": field, "offset": offset} + + @property + def fields(self): + return self._fields def set(self, vm, addr, val): raw = str(val) vm.set_mem(addr, raw) def get(self, vm, addr): - return self._il_type(vm, addr) + return self.pinned(vm, addr) + + def get_field(self, vm, addr, name): + """get a field value by name. + + useless most of the time since fields are accessible via self.. + """ + if name not in self._fields_desc: + raise ValueError("'%s' type has no field '%s'" + % (self, name)) + field = self.get_field_type(name) + offset = self.get_offset(name) + return field.get(vm, addr + offset) + + def set_field(self, vm, addr, name, val): + """set a field value by name. @val is the python value corresponding to + this field type. + + useless most of the time since fields are accessible via self.. + """ + if name not in self._fields_desc: + raise AttributeError("'%s' object has no attribute '%s'" + % (self.__class__.__name__, name)) + field = self.get_field_type(name) + offset = self.get_offset(name) + field.set(vm, addr + offset, val) def size(self): - return self._il_type.sizeof() + # Child classes can set self._size if their size is not the sum of + # their fields + return sum(a["field"].size() for a in self._fields_desc.itervalues()) + + def get_offset(self, field_name): + """ + @field_name: (str, optional) the name of the field to get the + offset of + """ + if field_name not in self._fields_desc: + raise ValueError("This structure has no %s field" % field_name) + return self._fields_desc[field_name]['offset'] + + def get_field_type(self, name): + """return the type subclass instance describing field @name.""" + # TODO: move it to Struct + return self._fields_desc[name]['field'] + + #def _build_pinned_type(self): + # mem_type = type("PinnedStruct%s" % self.name, + # (PinnedStruct,), + # {'_type': self}) + # return mem_type + + def _get_pinned_base_class(self): + return PinnedStruct def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self._il_type) + return "Struct%s" % self.name def __eq__(self, other): return self.__class__ == other.__class__ and \ - self._il_type == other._il_type and \ - self._type_args == other._type_args and \ - self._type_kwargs == other._type_kwargs + self.fields == other.fields and \ + self.name == other.name def __hash__(self): - return hash((self.__class__, self._il_type, self._type_args)) + # Only hash name, not fields, because if a field is a Ptr to this + # Struct type, an infinite loop occurs + return hash((self.__class__, self.name)) + + +class Union(Struct): + """Allows to put multiple fields at the same offset in a PinnedStruct, similar + to unions in C. The Union will have the size of the largest of its fields. + + Example: + + class Example(PinnedStruct): + fields = [("uni", Union([ + ("f1", Num(". """ - if name not in self._attrs: - raise attributeerror("'%s' object has no attribute '%s'" - % (self.__class__.__name__, name)) - field = self._attrs[name]["field"] - offset = self._attrs[name]["offset"] - return field.get(self._vm, self.get_addr() + offset) + return self._type.get_field(self._vm, self.get_addr(), name) def set_field(self, name, val): """set a field value by name. @val is the python value corresponding to @@ -941,37 +1001,7 @@ class PinnedStruct(PinnedType): useless most of the time since fields are accessible via self.. """ - if name not in self._attrs: - raise attributeerror("'%s' object has no attribute '%s'" - % (self.__class__.__name__, name)) - field = self._attrs[name]["field"] - offset = self._attrs[name]["offset"] - field.set(self._vm, self.get_addr() + offset, val) - - def deref_field(self, name): - """get the memstruct pointed by field. - - useless most of the time since fields are accessible via - self.deref_. - """ - addr = self.get_field(name) - field = self._attrs[name]["field"] - assert isinstance(field, Ptr),\ - "programming error: field should be a Ptr" - return field.deref_get(self._vm, addr) - - def set_deref_field(self, name, val): - """set the memstruct pointed by field. @val should be of the - type of the pointed memstruct. the field must be a Ptr. - - useless most of the time since fields are accessible via - self.deref_. - """ - addr = self.get_field(name) - field = self._attrs[name]["field"] - assert isinstance(field, Ptr),\ - "programming error: field should be a Ptr" - field.deref_set(self._vm, addr, val) + return self._type.set_field(self._vm, self.get_addr(), name, val) def cast_field(self, field, other_type, *type_args, **type_kwargs): """ @@ -981,7 +1011,7 @@ class PinnedStruct(PinnedType): *type_args, **type_kwargs) - # Field generation methods, voluntarily public to be able to regen fields + # Field generation methods, voluntarily public to be able to gen fields # after class definition @classmethod @@ -1006,56 +1036,59 @@ class PinnedStruct(PinnedType): class B(PinnedStruct): fields = [("a", Ptr("I", A))] - A.fields = [("b", Ptr("I", B))] - a.gen_field() + A.gen_fields([("b", Ptr("I", B))]) """ - if fields is None: - fields = cls.fields - cls._attrs = {} - offset = 0 - for name, field in cls.fields: - # For reflexion - field._set_self_type(cls) - cls.gen_field(name, field, offset) - offset += field.size() - cls._size = offset + if fields is not None: + if cls.fields is not None: + raise ValueError("Cannot regen fields of a class. Setting " + "cls.fields at class definition and calling " + "gen_fields are mutually exclusive.") + cls.fields = fields + + if cls._type is None: + if cls.fields is None: + raise ValueError("Cannot create a PinnedStruct subclass without" + " a cls._type or a cls.fields") + cls._type = cls._gen_type(cls.fields) + + if cls._type in DYN_MEM_STRUCT_CACHE: + # FIXME: Maybe a warning would be better? + raise RuntimeError("Another PinnedType has the same type as this " + "one. Use it instead.") + + # Register this class so that another one will not be created when + # calling cls._type.pinned + DYN_MEM_STRUCT_CACHE[cls._type] = cls + + cls._gen_attributes() @classmethod - def gen_field(cls, name, field, offset): - """Generate only one field - - @name: (str) the name of the field - @field: (Type instance) the field type - @offset: (int) the offset of the field in the structure - """ - cls._gen_simple_attr(name, field, offset) - if isinstance(field, Union): - cls._gen_union_attr(field, offset) + def _gen_attributes(cls): + # Generate self. getter and setters + for name, field in cls._type.fields: + setattr(cls, name, property( + lambda self, name=name: self.get_field(name), + lambda self, val, name=name: self.set_field(name, val) + )) @classmethod - def _gen_simple_attr(cls, name, field, offset): - cls._attrs[name] = {"field": field, "offset": offset} - - # Generate self. getter and setter - setattr(cls, name, property( - lambda self: self.get_field(name), - lambda self, val: self.set_field(name, val) - )) - - # Generate self.deref_ getter and setter if this field is a - # Ptr - if isinstance(field, Ptr): - setattr(cls, "deref_%s" % name, property( - lambda self: self.deref_field(name), - lambda self, val: self.set_deref_field(name, val) - )) + def _gen_type(cls, fields): + return Struct(cls.__name__, fields) + + def __repr__(self): + out = [] + for name, field in self._type.fields: + val_repr = repr(self.get_field(name)) + if '\n' in val_repr: + val_repr = '\n' + indent(val_repr, 4) + out.append("%s: %r = %s" % (name, field, val_repr)) + return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) + +class PinnedUnion(PinnedStruct): @classmethod - def _gen_union_attr(cls, union_field, offset): - if not isinstance(union_field, Union): - raise ValueError("field should be an Union instance") - for name, field in union_field.field_list: - cls.gen_field(name, field, offset) + def _gen_type(cls, fields): + return Union(fields) class PinnedSelf(PinnedStruct): @@ -1069,19 +1102,30 @@ class PinnedSelf(PinnedStruct): ("data", Ptr("). Deref can be read and set. + # TODO: comment ("other", Ptr("I", OtherStruct)), # Ptr to a variable length String ("s", Ptr("I", PinnedStr)), @@ -61,15 +60,15 @@ assert memval == 3 mstruct.memset() assert mstruct.num == 0 assert mstruct.flags == 0 -assert mstruct.other == 0 -assert mstruct.s == 0 -assert mstruct.i == 0 +assert mstruct.other.val == 0 +assert mstruct.s.val == 0 +assert mstruct.i.val == 0 mstruct.memset('\x11') assert mstruct.num == 0x11111111 assert mstruct.flags == 0x11 -assert mstruct.other == 0x11111111 -assert mstruct.s == 0x11111111 -assert mstruct.i == 0x11111111 +assert mstruct.other.val == 0x11111111 +assert mstruct.s.val == 0x11111111 +assert mstruct.i.val == 0x11111111 # From now, just use heap.vm_alloc @@ -85,19 +84,21 @@ other.foo = 0x1234 assert other.foo == 0x1234 ## Basic usage -mstruct.other = other.get_addr() -assert mstruct.other == other.get_addr() -assert mstruct.deref_other == other -assert mstruct.deref_other.foo == 0x1234 +mstruct.other.val = other.get_addr() +# This also works for now: +# mstruct.other = other.get_addr() +assert mstruct.other.val == other.get_addr() +assert mstruct.other.deref == other +assert mstruct.other.deref.foo == 0x1234 ## Deref assignment other2 = OtherStruct(jitter.vm) other2.foo = 0xbeef -assert mstruct.deref_other != other2 -mstruct.deref_other = other2 -assert mstruct.deref_other == other2 -assert mstruct.deref_other.foo == 0xbeef -assert mstruct.other == other.get_addr() # Addr did not change +assert mstruct.other.deref != other2 +mstruct.other.deref = other2 +assert mstruct.other.deref == other2 +assert mstruct.other.deref.foo == 0xbeef +assert mstruct.other.val == other.get_addr() # Addr did not change assert other.foo == 0xbeef # Deref assignment copies by value assert other2.foo == 0xbeef assert other.get_addr() != other2.get_addr() # Not the same address @@ -105,11 +106,12 @@ assert other == other2 # But same value ## Same stuff for Ptr to PinnedField alloc_addr = my_heap.vm_alloc(jitter.vm, - mstruct.get_field_type("i").dst_type.sizeof()) + mstruct.get_type().get_field_type("i") + .dst_type.sizeof()) mstruct.i = alloc_addr -mstruct.deref_i.value = 8 -assert mstruct.deref_i.value == 8 -assert mstruct.i == alloc_addr +mstruct.i.deref.val = 8 +assert mstruct.i.deref.val == 8 +assert mstruct.i.val == alloc_addr memval = struct.unpack("I", jitter.vm.get_mem(alloc_addr, 4))[0] assert memval == 8 @@ -117,37 +119,37 @@ assert memval == 8 # Str tests ## Basic tests memstr = PinnedStr(jitter.vm, addr_str) -memstr.value = "" -assert memstr.value == "" +memstr.val = "" +assert memstr.val == "" assert jitter.vm.get_mem(memstr.get_addr(), 1) == '\x00' -memstr.value = "lala" +memstr.val = "lala" assert jitter.vm.get_mem(memstr.get_addr(), memstr.get_size()) == 'lala\x00' jitter.vm.set_mem(memstr.get_addr(), 'MIAMs\x00') -assert memstr.value == 'MIAMs' +assert memstr.val == 'MIAMs' ## Ptr(PinnedStr) manipulations -mstruct.s = memstr.get_addr() -assert mstruct.s == addr_str -assert mstruct.deref_s == memstr -assert mstruct.deref_s.value == 'MIAMs' -mstruct.deref_s.value = "That's all folks!" -assert mstruct.deref_s.value == "That's all folks!" -assert memstr.value == "That's all folks!" +mstruct.s.val = memstr.get_addr() +assert mstruct.s.val == addr_str +assert mstruct.s.deref == memstr +assert mstruct.s.deref.val == 'MIAMs' +mstruct.s.deref.val = "That's all folks!" +assert mstruct.s.deref.val == "That's all folks!" +assert memstr.val == "That's all folks!" ## Other address, same value, same encoding memstr2 = PinnedStr(jitter.vm, addr_str2) -memstr2.value = "That's all folks!" +memstr2.val = "That's all folks!" assert memstr2.get_addr() != memstr.get_addr() assert memstr2 == memstr ## Same value, other encoding memstr3 = PinnedStr(jitter.vm, addr_str3, "utf16") -memstr3.value = "That's all folks!" +memstr3.val = "That's all folks!" assert memstr3.get_addr() != memstr.get_addr() assert memstr3.get_size() != memstr.get_size() # Size is different assert str(memstr3) != str(memstr) # Pinned representation is different assert memstr3 != memstr # Encoding is different, so they are not eq -assert memstr3.value == memstr.value # But the python value is the same +assert memstr3.val == memstr.val # But the python value is the same # PinnedArray tests @@ -254,7 +256,7 @@ for val in ms2.s2: assert val == 2 -# Inline tests +# Inlining a PinnedType tests class InStruct(PinnedStruct): fields = [ ("foo", Num("B")), @@ -264,7 +266,7 @@ class InStruct(PinnedStruct): class ContStruct(PinnedStruct): fields = [ ("one", Num("B")), - ("instruct", Inline(InStruct)), + ("instruct", InStruct.get_type()), ("last", Num("B")), ] @@ -298,7 +300,7 @@ class UniStruct(PinnedStruct): fields = [ ("one", Num("B")), ("union", Union([ - ("instruct", Inline(InStruct)), + ("instruct", InStruct.get_type()), ("i", Num(">I")), ])), ("last", Num("B")), @@ -308,20 +310,21 @@ uni = UniStruct(jitter.vm) jitter.vm.set_mem(uni.get_addr(), ''.join(chr(x) for x in xrange(len(uni)))) assert len(uni) == 6 # 1 + max(InStruct.sizeof(), 4) + 1 assert uni.one == 0x00 -assert uni.instruct.foo == 0x01 -assert uni.instruct.bar == 0x02 -assert uni.i == 0x01020304 +assert uni.union.instruct.foo == 0x01 +assert uni.union.instruct.bar == 0x02 +assert uni.union.i == 0x01020304 assert uni.last == 0x05 -uni.instruct.foo = 0x02 -assert uni.i == 0x02020304 -uni.i = 0x11223344 -assert uni.instruct.foo == 0x11 -assert uni.instruct.bar == 0x22 +uni.union.instruct.foo = 0x02 +assert uni.union.i == 0x02020304 +uni.union.i = 0x11223344 +assert uni.union.instruct.foo == 0x11 +assert uni.union.instruct.bar == 0x22 # BitField test -class BitStruct(PinnedStruct): +class BitStruct(PinnedUnion): fields = [ + ("flags_num", Num("H")), ("flags", BitField(Num("H"), [ ("f1_1", 1), ("f2_5", 5), @@ -332,24 +335,24 @@ class BitStruct(PinnedStruct): bit = BitStruct(jitter.vm) bit.memset() -assert bit.flags == 0 -assert bit.f1_1 == 0 -assert bit.f2_5 == 0 -assert bit.f3_8 == 0 -assert bit.f4_1 == 0 -bit.f1_1 = 1 -bit.f2_5 = 0b10101 -bit.f3_8 = 0b10000001 -assert bit.flags == 0b0010000001101011 -assert bit.f1_1 == 1 -assert bit.f2_5 == 0b10101 -assert bit.f3_8 == 0b10000001 -assert bit.f4_1 == 0 -bit.flags = 0b1101010101011100 -assert bit.f1_1 == 0 -assert bit.f2_5 == 0b01110 -assert bit.f3_8 == 0b01010101 -assert bit.f4_1 == 1 +assert bit.flags_num == 0 +assert bit.flags.f1_1 == 0 +assert bit.flags.f2_5 == 0 +assert bit.flags.f3_8 == 0 +assert bit.flags.f4_1 == 0 +bit.flags.f1_1 = 1 +bit.flags.f2_5 = 0b10101 +bit.flags.f3_8 = 0b10000001 +assert bit.flags_num == 0b0010000001101011 +assert bit.flags.f1_1 == 1 +assert bit.flags.f2_5 == 0b10101 +assert bit.flags.f3_8 == 0b10000001 +assert bit.flags.f4_1 == 0 +bit.flags_num = 0b1101010101011100 +assert bit.flags.f1_1 == 0 +assert bit.flags.f2_5 == 0b01110 +assert bit.flags.f3_8 == 0b01010101 +assert bit.flags.f4_1 == 1 # Unhealthy ideas @@ -363,72 +366,83 @@ class UnhealthyIdeas(PinnedStruct): ("pppself", Ptr("I", Ptr("I", Ptr("I", PinnedSelf)))), ] -# Other way to handle self dependency and circular dependencies -# NOTE: in this case, PinnedSelf would have been fine -UnhealthyIdeas.fields.append( - ("pppself2", Ptr("I", Ptr("I", Ptr("I", UnhealthyIdeas))))) -# Regen all fields -UnhealthyIdeas.gen_fields() - p_size = Ptr("I", PinnedVoid).size() ideas = UnhealthyIdeas(jitter.vm) ideas.memset() ideas.pself = ideas.get_addr() -assert ideas == ideas.deref_pself +assert ideas == ideas.pself.deref ideas.apself[0] = ideas.get_addr() -assert ideas.apself.deref_get(0) == ideas +assert ideas.apself[0].deref == ideas ideas.apself[1] = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) -ideas.apself.deref_set(1, ideas) +ideas.apself[1].deref = ideas assert ideas.apself[1] != ideas.get_addr() -assert ideas.apself.deref_get(1) == ideas +assert ideas.apself[1].deref == ideas ideas.ppself = my_heap.vm_alloc(jitter.vm, p_size) -ideas.deref_ppself.value = ideas.get_addr() -assert ideas.deref_ppself.value == ideas.get_addr() -assert ideas.deref_ppself.deref_value == ideas +ideas.ppself.deref.val = ideas.get_addr() +assert ideas.ppself.deref.val == ideas.get_addr() +assert ideas.ppself.deref.deref == ideas -ideas.deref_ppself.value = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) -ideas.deref_ppself.deref_value = ideas -assert ideas.deref_ppself.value != ideas.get_addr() -assert ideas.deref_ppself.deref_value == ideas +ideas.ppself.deref.val = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) +ideas.ppself.deref.deref = ideas +assert ideas.ppself.deref.val != ideas.get_addr() +assert ideas.ppself.deref.deref == ideas ideas.pppself = my_heap.vm_alloc(jitter.vm, p_size) -ideas.deref_pppself.value = my_heap.vm_alloc(jitter.vm, p_size) -ideas.deref_pppself.deref_value.value = ideas.get_addr() -assert ideas.deref_pppself.deref_value.deref_value == ideas +ideas.pppself.deref.val = my_heap.vm_alloc(jitter.vm, p_size) +ideas.pppself.deref.deref.val = ideas.get_addr() +assert ideas.pppself.deref.deref.deref == ideas + + +# Circular dependencies +class A(PinnedStruct): + pass + +class B(PinnedStruct): + fields = [("a", Ptr("I", A)),] + +# Gen A's fields after declaration +A.gen_fields([("b", Ptr("I", B)),]) + +a = A(jitter.vm) +b = B(jitter.vm) +a.b.val = b.get_addr() +b.a.val = a.get_addr() +assert a.b.deref == b +assert b.a.deref == a # Cast tests # PinnedStruct cast -PinnedInt = pin(Num("I")) -PinnedShort = pin(Num("H")) +PinnedInt = Num("I").pinned +PinnedShort = Num("H").pinned dword = PinnedInt(jitter.vm) -dword.value = 0x12345678 +dword.val = 0x12345678 assert isinstance(dword.cast(PinnedShort), PinnedShort) -assert dword.cast(PinnedShort).value == 0x5678 +assert dword.cast(PinnedShort).val == 0x5678 # Field cast ms2.s2[0] = 0x34 ms2.s2[1] = 0x12 -assert ms2.cast_field("s2", PinnedShort).value == 0x1234 +assert ms2.cast_field("s2", PinnedShort).val == 0x1234 # Other method -assert PinnedShort(jitter.vm, ms2.get_addr("s2")).value == 0x1234 +assert PinnedShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 # Manual cast inside an Array ms2.s2[4] = 0xcd ms2.s2[5] = 0xab -assert PinnedShort(jitter.vm, ms2.s2.index2addr(4)).value == 0xabcd +assert PinnedShort(jitter.vm, ms2.s2.index2addr(4)).val == 0xabcd # void* style cast -PinnedPtrVoid = pin(Ptr("I", PinnedVoid)) -PinnedPtrMyStruct = pin(Ptr("I", MyStruct)) +PinnedPtrVoid = Ptr("I", PinnedVoid).pinned +PinnedPtrMyStruct = Ptr("I", MyStruct).pinned p = PinnedPtrVoid(jitter.vm) -p.value = mstruct.get_addr() -assert p.deref_value.cast(MyStruct) == mstruct -assert p.cast(PinnedPtrMyStruct).deref_value == mstruct +p.val = mstruct.get_addr() +assert p.deref.cast(MyStruct) == mstruct +assert p.cast(PinnedPtrMyStruct).deref == mstruct # Field equality tests assert RawStruct("IH") == RawStruct("IH") @@ -438,11 +452,19 @@ assert Num(">I") != Num("I", MyStruct) != Ptr(" Date: Mon, 30 Nov 2015 10:06:35 +0100 Subject: MemStruct: Array/PinnedArray homogeneity Array access logic has moved to Array, Pinned(Sized)Array just contains the logic to interface with memory --- example/jitter/memstruct.py | 2 +- miasm2/analysis/mem.py | 208 ++++++++++++++++---------------------------- test/analysis/mem.py | 32 +++---- 3 files changed, 84 insertions(+), 158 deletions(-) (limited to 'example') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index 6e8e13af..038622ba 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -126,7 +126,7 @@ class DataArray(PinnedStruct): # PinnedStruct containing only one field named "val" will be created, so # that Ptr can point to a PinnedStruct instance. Here, # data_array.array.deref.val will allow to access an Array - ("arrayptr", Ptr("= self.size()): + raise IndexError("Index %s out of bounds" % idx) + + def _get_pinned_base_class(self): + if self.is_sized(): + return PinnedSizedArray + else: + return PinnedArray def __repr__(self): return "%r[%s]" % (self.field_type, self.array_len) @@ -891,7 +930,9 @@ class PinnedType(object): return "Pinned%r" % self._type def __eq__(self, other): - return self.__class__ == other.__class__ and str(self) == str(other) + return self.__class__ == other.__class__ and \ + self.get_type() == other.get_type() and \ + str(self) == str(other) def __ne__(self, other): return not self == other @@ -1200,87 +1241,34 @@ class PinnedArray(PinnedType): Such a generated type can be instanciated with only vm and addr, as are other PinnedTypes. """ - _field_type = None - - def __init__(self, vm, addr=None, field_type=None): - if self._field_type is None: - self._field_type = field_type - if self._field_type is None: - raise NotImplementedError( - "Provide field_type to instanciate this class, " - "or generate a subclass with mem_array_type.") - # FIXME: use underlying Array type - super(PinnedArray, self).__init__(vm, addr, - Array(self._field_type, None)) @property def field_type(self): """Return the Type subclass instance that represents the type of this PinnedArray items. """ - return self._field_type - - def _normalize_idx(self, idx): - # Noop for this type - return idx - - def _normalize_slice(self, slice_): - start = slice_.start if slice_.start is not None else 0 - stop = slice_.stop if slice_.stop is not None else self.get_size() - step = slice_.step if slice_.step is not None else 1 - return slice(start, stop, step) + return self.get_type().field_type - def _check_bounds(self, idx): - idx = self._normalize_idx(idx) - if not isinstance(idx, (int, long)): - raise ValueError("index must be an int or a long") - if idx < 0: - raise IndexError("Index %s out of bounds" % idx) - - def index2addr(self, idx): - """Return the address corresponding to a given @index in this PinnedArray. - """ - self._check_bounds(idx) - addr = self.get_addr() + idx * self._field_type.size() - return addr + def get_addr(self, idx=0): + return self._addr + self.get_type().get_offset(idx) def __getitem__(self, idx): - if isinstance(idx, slice): - res = [] - idx = self._normalize_slice(idx) - for i in xrange(idx.start, idx.stop, idx.step): - res.append(self._field_type.get(self._vm, self.index2addr(i))) - return res - else: - return self._field_type.get(self._vm, self.index2addr(idx)) + return self.get_type().get_item(self._vm, self._addr, idx) def __setitem__(self, idx, item): - if isinstance(idx, slice): - idx = self._normalize_slice(idx) - if len(item) != len(xrange(idx.start, idx.stop, idx.step)): - raise ValueError("Mismatched lengths in slice assignment") - # TODO: izip - for i, val in zip(xrange(idx.start, idx.stop, idx.step), item): - self._field_type.set(self._vm, self.index2addr(i), val) - else: - self._field_type.set(self._vm, self.index2addr(idx), item) + self.get_type().set_item(self._vm, self._addr, idx, item) # just a shorthand def as_mem_str(self, encoding="ansi"): return self.cast(PinnedStr, encoding) - @classmethod - def sizeof(cls): - raise ValueError("%s is unsized, it has no static size (sizeof). " - "Use PinnedSizedArray instead." % cls) - def raw(self): raise ValueError("%s is unsized, which prevents from getting its full " "raw representation. Use PinnedSizedArray instead." % self.__class__) def __repr__(self): - return "[%r, ...] [%r]" % (self[0], self._field_type) + return "[%r, ...] [%r]" % (self[0], self.field_type) class PinnedSizedArray(PinnedArray): @@ -1290,44 +1278,17 @@ class PinnedSizedArray(PinnedArray): This type is dynamically sized. Generate a fixed @field_type and @array_len array which has a static size by using Array(type, size).pinned. """ - _array_len = None - - def __init__(self, vm, addr=None, field_type=None, array_len=None): - # Set the length before anything else to allow get_size() to work for - # allocation - if self._array_len is None: - self._array_len = array_len - super(PinnedSizedArray, self).__init__(vm, addr, field_type) - if self._array_len is None or self._field_type is None: - raise NotImplementedError( - "Provide field_type and array_len to instanciate this class, " - "or generate a subclass with Array(type, size).pinned.") @property def array_len(self): """The length, in number of elements, of this array.""" - return self._array_len - - def sizeof(cls): - raise ValueError("PinnedSizedArray is not statically sized. Use " - "Array(type, size).pinned to generate a type that is.") + return self.get_type().array_len def get_size(self): - return self._array_len * self._field_type.size() - - def _normalize_idx(self, idx): - if idx < 0: - return self.get_size() - idx - return idx - - def _check_bounds(self, idx): - if not isinstance(idx, int) and not isinstance(idx, long): - raise ValueError("index must be an int or a long") - if idx < 0 or idx >= self.get_size(): - raise IndexError("Index %s out of bounds" % idx) + return self.get_type().size() def __iter__(self): - for i in xrange(self._array_len): + for i in xrange(self.get_type().array_len): yield self[i] def raw(self): @@ -1335,32 +1296,9 @@ class PinnedSizedArray(PinnedArray): def __repr__(self): item_reprs = [repr(item) for item in self] - if self._array_len > 0 and '\n' in item_reprs[0]: + if self.array_len > 0 and '\n' in item_reprs[0]: items = '\n' + indent(',\n'.join(item_reprs), 2) + '\n' else: items = ', '.join(item_reprs) - return "[%s] [%r; %s]" % (items, self._field_type, self._array_len) - - def __eq__(self, other): - # Special implementation to handle dynamic subclasses - return isinstance(other, PinnedSizedArray) and \ - self._field_type == other._field_type and \ - self._array_len == other._array_len and \ - str(self) == str(other) - - -def mem_array_type(field_type): - """Generate a PinnedArray subclass that has a fixed @field_type. It allows to - instanciate this class with only vm and addr argument, as are standard - PinnedTypes. - """ - cache_key = (field_type, None) - if cache_key in DYN_MEM_STRUCT_CACHE: - return DYN_MEM_STRUCT_CACHE[cache_key] - - array_type = type('PinnedArray_%r' % (field_type,), - (PinnedArray,), - {'_field_type': field_type}) - DYN_MEM_STRUCT_CACHE[cache_key] = array_type - return array_type + return "[%s] [%r; %s]" % (items, self.field_type, self.array_len) diff --git a/test/analysis/mem.py b/test/analysis/mem.py index 60d9c569..e1a2861f 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -6,10 +6,9 @@ import struct from miasm2.analysis.machine import Machine from miasm2.analysis.mem import PinnedStruct, Num, Ptr, PinnedStr, PinnedArray,\ - PinnedSizedArray, Array, mem_array_type,\ - RawStruct, Union, BitField, PinnedSelf, \ - PinnedVoid, Bits, set_allocator, PinnedUnion, \ - Struct + PinnedSizedArray, Array, RawStruct, Union, \ + BitField, PinnedSelf, PinnedVoid, Bits, \ + set_allocator, PinnedUnion, Struct from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE from miasm2.os_dep.common import heap @@ -155,9 +154,7 @@ assert memstr3.val == memstr.val # But the python value is the same # PinnedArray tests # Allocate buffer manually, since memarray is unsized alloc_addr = my_heap.vm_alloc(jitter.vm, 0x100) -memarray = PinnedArray(jitter.vm, alloc_addr, Num("I")) -# This also works: -_memarray = mem_array_type(Num("I"))(jitter.vm, alloc_addr) +memarray = Array(Num("I")).pinned(jitter.vm, alloc_addr) memarray[0] = 0x02 assert memarray[0] == 0x02 assert jitter.vm.get_mem(memarray.get_addr(), @@ -188,19 +185,10 @@ try: except ValueError: pass -try: - memarray[1, 2] - assert False, "Should raise, mismatched sizes" -except ValueError: - pass - -# PinnedSizedArray tests -memsarray = PinnedSizedArray(jitter.vm, None, Num("I"), 10) -# This also works: -_memsarray = Array(Num("I"), 10).pinned(jitter.vm) +memsarray = Array(Num("I"), 10).pinned(jitter.vm) # And Array(type, size).pinned generates statically sized types -assert _memsarray.sizeof() == len(memsarray) +assert memsarray.sizeof() == Num("I").size() * 10 memsarray.memset('\xcc') assert memsarray[0] == 0xcccccccc assert len(memsarray) == 10 * 4 @@ -247,7 +235,7 @@ for val in ms2.s2: assert val == 1 ### Field assignment (PinnedSizedArray) -array2 = PinnedSizedArray(jitter.vm, None, Num("B"), 10) +array2 = Array(Num("B"), 10).pinned(jitter.vm) jitter.vm.set_mem(array2.get_addr(), '\x02'*10) for val in array2: assert val == 2 @@ -434,7 +422,7 @@ assert PinnedShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 # Manual cast inside an Array ms2.s2[4] = 0xcd ms2.s2[5] = 0xab -assert PinnedShort(jitter.vm, ms2.s2.index2addr(4)).val == 0xabcd +assert PinnedShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd # void* style cast PinnedPtrVoid = Ptr("I", PinnedVoid).pinned @@ -492,8 +480,8 @@ assert Num("f").pinned == Num("f").pinned assert Num("d").pinned != Num("f").pinned assert Union([("f1", Num("I")), ("f2", Num("H"))]).pinned == \ Union([("f1", Num("I")), ("f2", Num("H"))]).pinned -assert mem_array_type(Num("B")) == mem_array_type(Num("B")) -assert mem_array_type(Num("I")) != mem_array_type(Num("B")) +assert Array(Num("B")).pinned == Array(Num("B")).pinned +assert Array(Num("I")).pinned != Array(Num("B")).pinned assert Array(Num("B"), 20).pinned == Array(Num("B"), 20).pinned assert Array(Num("B"), 19).pinned != Array(Num("B"), 20).pinned -- cgit 1.4.1 From f21429370a65504745290c39ecb8113163976232 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 30 Nov 2015 11:00:48 +0100 Subject: MemStruct: Str type --- example/jitter/memstruct.py | 20 ++++----- miasm2/analysis/mem.py | 99 +++++++++++++++++++++++++++------------------ test/analysis/mem.py | 34 ++++++++-------- 3 files changed, 86 insertions(+), 67 deletions(-) (limited to 'example') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index 038622ba..5472798d 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -6,8 +6,8 @@ as well. """ from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import PinnedStruct, PinnedSelf, PinnedVoid, PinnedStr,\ - PinnedSizedArray, Ptr, Num, Array, set_allocator +from miasm2.analysis.mem import PinnedStruct, Self, Void, Str, Array, Ptr, \ + Num, Array, set_allocator from miasm2.os_dep.common import heap # Instanciate a heap @@ -29,10 +29,10 @@ class ListNode(PinnedStruct): # special marker PinnedSelf. # You could also set or modify ListNode.fields after the class # declaration and call ListNode.gen_fields() - ("next", Ptr(" Date: Mon, 30 Nov 2015 11:13:13 +0100 Subject: MemStruct: allow Type instance in cast --- example/jitter/memstruct.py | 6 +++--- miasm2/analysis/mem.py | 26 +++++++++++++++----------- test/analysis/mem.py | 3 +-- 3 files changed, 19 insertions(+), 16 deletions(-) (limited to 'example') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index 5472798d..3b6358cd 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -214,10 +214,10 @@ print raw_miams = '\x00'.join('Miams') + '\x00'*3 raw_miams_array = [ord(c) for c in raw_miams] assert list(data.array)[:len(raw_miams_array)] == raw_miams_array -assert data.array.cast(Str("utf16").pinned) == memstr +assert data.array.cast(Str("utf16")) == memstr # Default is "ansi" -assert data.array.cast(Str().pinned) != memstr -assert data.array.cast(Str("utf16").pinned).val == memstr.val +assert data.array.cast(Str()) != memstr +assert data.array.cast(Str("utf16")).val == memstr.val print "See that the original array has been modified:" print repr(data) diff --git a/miasm2/analysis/mem.py b/miasm2/analysis/mem.py index 9787a25e..4d9ac712 100644 --- a/miasm2/analysis/mem.py +++ b/miasm2/analysis/mem.py @@ -946,11 +946,16 @@ class PinnedType(object): raise ValueError("byte must be a 1-lengthed str") self._vm.set_mem(self.get_addr(), byte * self.get_size()) - def cast(self, other_type, *type_args, **type_kwargs): - """Cast this PinnedType to another PinnedType (same address, same vm, but - different type). Return the casted PinnedType. + def cast(self, other_type): + """Cast this PinnedType to another PinnedType (same address, same vm, + but different type). Return the casted PinnedType. + + @other_type: either a Type instance (other_type.pinned is used) or a + PinnedType subclass """ - return other_type(self._vm, self.get_addr(), *type_args, **type_kwargs) + if isinstance(other_type, Type): + other_type = other_type.pinned + return other_type(self._vm, self.get_addr()) def cast_field(self, field, other_type, *type_args, **type_kwargs): """ABSTRACT: Same as cast, but the address of the returned PinnedType @@ -958,6 +963,8 @@ class PinnedType(object): @field: field specification, for example its name for a struct, or an index in an array. See the subclass doc. + @other_type: either a Type instance (other_type.pinned is used) or a + PinnedType subclass """ raise NotImplementedError("Abstract") @@ -1074,12 +1081,13 @@ class PinnedStruct(PinnedType): """ return self._type.set_field(self._vm, self.get_addr(), name, val) - def cast_field(self, field, other_type, *type_args, **type_kwargs): + def cast_field(self, field, other_type): """ @field: a field name """ - return other_type(self._vm, self.get_addr(field), - *type_args, **type_kwargs) + if isinstance(other_type, Type): + other_type = other_type.pinned + return other_type(self._vm, self.get_addr(field)) # Field generation methods, voluntarily public to be able to gen fields @@ -1277,10 +1285,6 @@ class PinnedArray(PinnedType): def __setitem__(self, idx, item): self.get_type().set_item(self._vm, self._addr, idx, item) - # just a shorthand - def as_mem_str(self, encoding="ansi"): - return self.cast(Str(encoding).pinned) - def raw(self): raise ValueError("%s is unsized, which prevents from getting its full " "raw representation. Use PinnedSizedArray instead." % diff --git a/test/analysis/mem.py b/test/analysis/mem.py index 8d4a56d3..90022fe9 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -426,11 +426,10 @@ assert PinnedShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd # void* style cast PinnedPtrVoid = Ptr("I", Void()).pinned -PinnedPtrMyStruct = Ptr("I", MyStruct).pinned p = PinnedPtrVoid(jitter.vm) p.val = mstruct.get_addr() assert p.deref.cast(MyStruct) == mstruct -assert p.cast(PinnedPtrMyStruct).deref == mstruct +assert p.cast(Ptr("I", MyStruct)).deref == mstruct # Field equality tests assert RawStruct("IH") == RawStruct("IH") -- cgit 1.4.1 From 31650c36e3c079445fe6c26fc0a40c1bd19da57d Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 30 Nov 2015 15:25:27 +0100 Subject: MemStruct: Global doc update --- example/jitter/memstruct.py | 38 +++-- miasm2/analysis/mem.py | 386 ++++++++++++++++++++++++++------------------ test/analysis/mem.py | 4 +- 3 files changed, 254 insertions(+), 174 deletions(-) (limited to 'example') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index 3b6358cd..77d65d17 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -13,10 +13,11 @@ from miasm2.os_dep.common import heap # Instanciate a heap my_heap = heap() # And set it as the default memory allocator, to avoid manual allocation and -# explicit address passing to the PinnedStruct constructor +# explicit address passing to the PinnedType subclasses (like PinnedStruct) +# constructor set_allocator(my_heap.vm_alloc) -# Let's reimplement a simple C generic linked list mapped on a VmMngr! +# Let's reimplement a simple C generic linked list mapped on a VmMngr. # All the structures and methods will use the python objects but all the data # is in fact stored in the VmMngr @@ -24,14 +25,14 @@ set_allocator(my_heap.vm_alloc) class ListNode(PinnedStruct): fields = [ # The ", ),]; creates fields that correspond to + certain bits of the field; analogous to a Union of Bits (see Bits below) + - Str: a character string, with an encoding; not directly mapped to a C + type, it is a higher level notion provided for ease of use + - Void: analogous to C void, can be a placeholder in void*-style cases. + - Self: special marker to reference a Struct inside itself (FIXME: to + remove?) + +And some less common types: + + - Bits: mask only some bits of a Num + - RawStruct: abstraction over a simple struct pack/unpack (no mapping to a + standard C type) + +For each type, the `.pinned` property returns a PinnedType subclass that +allows to access the field in memory. + + +The easiest way to use the API to declare and manipulate new structures is to +subclass PinnedStruct and define a list of (, ): # FIXME: "I" => "u32" class MyStruct(PinnedStruct): fields = [ - # Integer field: just struct.pack fields with one value + # Scalar field: just struct.pack field with one value ("num", Num("I")), ("flags", Num("B")), - # Ptr fields are Num, but they can also be dereferenced - # (self.deref_). Deref can be read and set. + # Ptr fields contain two fields: "val", for the numerical value, + # and "deref" to get the pointed object ("other", Ptr("I", OtherStruct)), # Ptr to a variable length String - ("s", Ptr("I", PinnedStr)), + ("s", Ptr("I", Str())), ("i", Ptr("I", Num("I"))), ] @@ -22,8 +55,13 @@ And access the fields: mstruct = MyStruct(jitter.vm, addr) mstruct.num = 3 assert mstruct.num == 3 + mstruct.other.val = addr2 + # Also works: mstruct.other = addr2 - mstruct.deref_other = OtherStruct(jitter.vm, addr) + mstruct.other.deref = OtherStruct(jitter.vm, addr) + +PinnedUnion and PinnedBitField can also be subclassed, the `fields` field being +in the format expected by, respectively, Union and BitField. The `addr` argument can be omited if an allocator is set, in which case the structure will be automatically allocated in memory: @@ -34,46 +72,6 @@ structure will be automatically allocated in memory: Note that some structures (e.g. PinnedStr or PinnedArray) do not have a static size and cannot be allocated automatically. - - -As you saw previously, to use this module, you just have to inherit from -PinnedStruct and define a list of (, ). Available -Type classes are: - - - Num: for number (float or int) handling - - RawStruct: abstraction over a simple struct pack/unpack - - Ptr: a pointer to another PinnedType instance - - FIXME: TODEL Inline: include another PinnedStruct as a field (equivalent to having a - struct field into another struct in C) - - Array: a fixed size array of Types (points) - - Union: similar to `union` in C, list of Types at the same offset in a - structure; the union has the size of the biggest Type - - BitField: similar to C bitfields, a list of - [( as well as when used, to set and - get the pointed PinnedType. + Mapped to PinnedPtr (see its doc for more info): + + assert isinstance(mystruct.ptr, PinnedPtr) + mystruct.ptr = 0x4000 # Assign the Ptr numeric value + mystruct.ptr.val = 0x4000 # Also assigns the Ptr numeric value + assert isinstance(mystruct.ptr.val, int) # Get the Ptr numeric value + mystruct.ptr.deref # Get the pointed PinnedType + mystruct.ptr.deref = other # Set the pointed PinnedType """ def __init__(self, fmt, dst_type, *type_args, **type_kwargs): @@ -352,6 +367,7 @@ class Ptr(Num): return self._dst_type def set(self, vm, addr, val): + """A Ptr field can be set with a PinnedPtr or an int""" if isinstance(val, PinnedType) and isinstance(val.get_type(), Ptr): self.set_val(vm, addr, val.val) else: @@ -361,9 +377,11 @@ class Ptr(Num): return self.pinned(vm, addr) def get_val(self, vm, addr): + """Get the numeric value of a Ptr""" return super(Ptr, self).get(vm, addr) def set_val(self, vm, addr, val): + """Set the numeric value of a Ptr""" return super(Ptr, self).set(vm, addr, val) def deref_get(self, vm, addr): @@ -391,7 +409,7 @@ class Ptr(Num): return PinnedPtr def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self._dst_type) + return "%s(%r)" % (self.__class__.__name__, self.dst_type.get_type()) def __eq__(self, other): return super(Ptr, self).__eq__(other) and \ @@ -405,24 +423,25 @@ class Ptr(Num): class Struct(Type): - """Field used to inline a PinnedType in another PinnedType. Equivalent to - having a struct field in a C struct. - - Concretely: + """Equivalent to a C struct type. Composed of a name, and a + (, ) list describing the fields + of the struct. - class MyStructClass(PinnedStruct): - fields = [("f1", Num("I")), ("f2", Num("I"))] + Mapped to PinnedStruct. - class Example(PinnedStruct): - fields = [("mystruct", Inline(MyStructClass))] + NOTE: The `.pinned` property of Struct creates classes on the fly. If an + equivalent structure is created by subclassing PinnedStruct, an exception + is raised to prevent creating multiple classes designating the same type. - ex = Example(vm, addr) - ex.mystruct.f2 = 3 # inlined structure field access - ex.mystruct = MyStructClass(vm, addr2) # struct copy + Example: + s = Struct("Toto", [("f1", Num("I")), ("f2", Num("I"))]) - It can be seen like a bridge to use a PinnedStruct as a Type + Toto1 = s.pinned - TODO: make the Inline implicit when setting a field to be a PinnedStruct + # This raises an exception, because it describes the same structure as + # Toto1 + class Toto(PinnedStruct): + fields = [("f1", Num("I")), ("f2", Num("I"))] """ def __init__(self, name, fields): @@ -438,18 +457,8 @@ class Struct(Type): for name, field in self._fields: # For reflexion field._set_self_type(self) - self._gen_field(name, field, offset) + self._fields_desc[name] = {"field": field, "offset": offset} offset += field.size() - self._size = offset - - def _gen_field(self, name, field, offset): - """Generate only one field - - @name: (str) the name of the field - @field: (Type instance) the field type - @offset: (int) the offset of the field in the structure - """ - self._fields_desc[name] = {"field": field, "offset": offset} @property def fields(self): @@ -463,22 +472,16 @@ class Struct(Type): return self.pinned(vm, addr) def get_field(self, vm, addr, name): - """get a field value by name. - - useless most of the time since fields are accessible via self.. - """ + """Get a field value by @name and base structure @addr in @vm VmMngr.""" if name not in self._fields_desc: - raise ValueError("'%s' type has no field '%s'" - % (self, name)) + raise ValueError("'%s' type has no field '%s'" % (self, name)) field = self.get_field_type(name) offset = self.get_offset(name) return field.get(vm, addr + offset) def set_field(self, vm, addr, name, val): - """set a field value by name. @val is the python value corresponding to - this field type. - - useless most of the time since fields are accessible via self.. + """Set a field value by @name and base structure @addr in @vm VmMngr. + @val is the python value corresponding to this field type. """ if name not in self._fields_desc: raise AttributeError("'%s' object has no attribute '%s'" @@ -488,9 +491,7 @@ class Struct(Type): field.set(vm, addr + offset, val) def size(self): - # Child classes can set self._size if their size is not the sum of - # their fields - return sum(a["field"].size() for a in self._fields_desc.itervalues()) + return sum(field.size() for _, field in self.fields) def get_offset(self, field_name): """ @@ -502,15 +503,14 @@ class Struct(Type): return self._fields_desc[field_name]['offset'] def get_field_type(self, name): - """return the type subclass instance describing field @name.""" - # TODO: move it to Struct + """Return the Type subclass instance describing field @name.""" return self._fields_desc[name]['field'] def _get_pinned_base_class(self): return PinnedStruct def __repr__(self): - return "Struct%s" % self.name + return "struct %s" % self.name def __eq__(self, other): return self.__class__ == other.__class__ and \ @@ -524,8 +524,13 @@ class Struct(Type): class Union(Struct): - """Allows to put multiple fields at the same offset in a PinnedStruct, similar - to unions in C. The Union will have the size of the largest of its fields. + """Represents a C union. + + Allows to put multiple fields at the same offset in a PinnedStruct, + similar to unions in C. The Union will have the size of the largest of its + fields. + + Mapped to PinnedUnion. Example: @@ -542,7 +547,7 @@ class Union(Struct): """ def __init__(self, field_list): - """field_list is a [(name, field)] list, see the class doc""" + """@field_list: a [(name, field)] list, see the class doc""" super(Union, self).__init__("union", field_list) def size(self): @@ -561,11 +566,18 @@ class Union(Struct): class Array(Type): - """A fixed size array (contiguous sequence) of a Type subclass - elements. Similar to something like the char[10] type in C. + """An array (contiguous sequence) of a Type subclass elements. + + Can be sized (similar to something like the char[10] type in C) or unsized + if no @array_len is given to the constructor (similar to char* used as an + array). + + Mapped to PinnedArray or PinnedSizedArray, depending on if the Array is + sized or not. Getting an array field actually returns a PinnedSizedArray. Setting it is - possible with either a list or a PinnedSizedArray instance. Examples of syntax: + possible with either a list or a PinnedSizedArray instance. Examples of + syntax: class Example(PinnedStruct): fields = [("array", Array(Num("B"), 4))] @@ -616,10 +628,14 @@ class Array(Type): "array_len instead." % self) def get_offset(self, idx): + """Returns the offset of the item at index @idx.""" return self.field_type.size() * idx def get_item(self, vm, addr, idx): - """idx can be a slice""" + """Get the item(s) at index @idx. + + @idx: int, long or slice + """ if isinstance(idx, slice): res = [] idx = self._normalize_slice(idx) @@ -630,6 +646,9 @@ class Array(Type): return self.field_type.get(vm, addr + self.get_offset(idx)) def set_item(self, vm, addr, idx, item): + """Sets one or multiple items in this array (@idx can be an int, long + or slice). + """ if isinstance(idx, slice): idx = self._normalize_slice(idx) if len(item) != len(xrange(idx.start, idx.stop, idx.step)): @@ -641,6 +660,9 @@ class Array(Type): self.field_type.set(vm, addr + self.get_offset(idx), item) def is_sized(self): + """True if this is a sized array (non None self.array_len), False + otherwise. + """ return self.array_len is not None def _normalize_idx(self, idx): @@ -669,7 +691,7 @@ class Array(Type): return PinnedArray def __repr__(self): - return "%r[%s]" % (self.field_type, self.array_len) + return "%r[%s]" % (self.field_type, self.array_len or "unsized") def __eq__(self, other): return self.__class__ == other.__class__ and \ @@ -752,6 +774,8 @@ class BitField(Union): endian int, little endian short...). Can be seen (and implemented) as a Union of Bits fields. + Mapped to PinnedBitField. + Creates fields that allow to access the bitfield fields easily. Example: class Example(PinnedStruct): @@ -787,6 +811,9 @@ class BitField(Union): def set(self, vm, addr, val): self._num.set(vm, addr, val) + def _get_pinned_base_class(self): + return PinnedBitField + def __eq__(self, other): return self.__class__ == other.__class__ and \ self._num == other._num and super(BitField, self).__eq__(other) @@ -794,8 +821,23 @@ class BitField(Union): def __hash__(self): return hash((super(BitField, self).__hash__(), self._num)) + def __repr__(self): + fields_repr = ', '.join("%s: %r" % (name, field.bit_size) + for name, field in self.fields) + return "%s(%s)" % (self.__class__.__name__, fields_repr) + class Str(Type): + """A string type that handles encoding. This type is unsized (no static + size). + + The @encoding is passed to the constructor, and is currently either null + terminated "ansi" (latin1) or (double) null terminated "utf16". Be aware + that the utf16 implementation is a bit buggy... + + Mapped to PinnedStr. + """ + def __init__(self, encoding="ansi"): # TODO: encoding as lambda if encoding not in ["ansi", "utf16"]: @@ -828,6 +870,7 @@ class Str(Type): @property def enc(self): + """This Str's encoding name (as a str).""" return self._enc def _get_pinned_base_class(self): @@ -844,7 +887,10 @@ class Str(Type): class Void(Type): - """Represents the C void type.""" + """Represents the C void type. + + Mapped to PinnedVoid. + """ def _build_pinned_type(self): return PinnedVoid @@ -855,7 +901,20 @@ class Void(Type): def __hash__(self): return hash(self.__class__) + class Self(Void): + """Special marker to reference a type inside itself. + + Mapped to PinnedSelf. + + Example: + class ListNode(PinnedStruct): + fields = [ + ("next", Ptr("). Deref can be read and set. + # Ptr fields contain two fields: "val", for the numerical value, + # and "deref" to get the pointed object ("other", Ptr("I", OtherStruct)), - ("i", Ptr("I", Num("I"))), # Ptr to a variable length String - ("s", Ptr("I", PinnedStr)), + ("s", Ptr("I", Str())), + ("i", Ptr("I", Num("I"))), ] mstruct = MyStruct(vm, addr) @@ -1046,11 +1122,17 @@ class PinnedStruct(PinnedType): other = OtherStruct(vm, addr2) mstruct.other = other.get_addr() - assert mstruct.other == other.get_addr() - assert mstruct.deref_other == other - assert mstruct.deref_other.foo == 0x1234 + assert mstruct.other.val == other.get_addr() + assert mstruct.other.deref == other + assert mstruct.other.deref.foo == 0x1234 - See the various Type doc for more information. + Note that: + MyStruct = Struct("MyStruct", ).pinned + is equivalent to the previous MyStruct declaration. + + See the various Type-s doc for more information. See PinnedStruct.gen_fields + doc for more information on how to handle recursive types and cyclic + dependencies. """ __metaclass__ = _MetaPinnedStruct fields = None @@ -1067,14 +1149,14 @@ class PinnedStruct(PinnedType): return self._addr + offset def get_field(self, name): - """get a field value by name. + """Get a field value by name. useless most of the time since fields are accessible via self.. """ return self._type.get_field(self._vm, self.get_addr(), name) def set_field(self, name, val): - """set a field value by name. @val is the python value corresponding to + """Set a field value by name. @val is the python value corresponding to this field type. useless most of the time since fields are accessible via self.. @@ -1082,17 +1164,13 @@ class PinnedStruct(PinnedType): return self._type.set_field(self._vm, self.get_addr(), name, val) def cast_field(self, field, other_type): - """ - @field: a field name - """ + """In this implementation, @field is a field name""" if isinstance(other_type, Type): other_type = other_type.pinned return other_type(self._vm, self.get_addr(field)) - - # Field generation methods, voluntarily public to be able to gen fields + # Field generation method, voluntarily public to be able to gen fields # after class definition - @classmethod def gen_fields(cls, fields=None): """Generate the fields of this class (so that they can be accessed with @@ -1165,21 +1243,22 @@ class PinnedStruct(PinnedType): class PinnedUnion(PinnedStruct): + """Same as PinnedStruct but all fields have a 0 offset in the struct.""" @classmethod def _gen_type(cls, fields): return Union(fields) +class PinnedBitField(PinnedUnion): + """PinnedUnion of Bits(...) fields.""" + @classmethod + def _gen_type(cls, fields): + return BitField(fields) + + class PinnedSelf(PinnedStruct): """Special Marker class for reference to current class in a Ptr or Array - (mostly Array of Ptr). - - Example: - class ListNode(PinnedStruct): - fields = [ - ("next", Ptr(" Date: Mon, 30 Nov 2015 15:48:09 +0100 Subject: MemStruct: Pinned* renamed back to Mem* --- example/jitter/memstruct.py | 22 ++-- miasm2/analysis/mem.py | 250 ++++++++++++++++++++++---------------------- test/analysis/mem.py | 60 +++++------ 3 files changed, 166 insertions(+), 166 deletions(-) (limited to 'example') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index 77d65d17..4ddbea86 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -6,14 +6,14 @@ as well. """ from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import PinnedStruct, Self, Void, Str, Array, Ptr, \ +from miasm2.analysis.mem import MemStruct, Self, Void, Str, Array, Ptr, \ Num, Array, set_allocator from miasm2.os_dep.common import heap # Instanciate a heap my_heap = heap() # And set it as the default memory allocator, to avoid manual allocation and -# explicit address passing to the PinnedType subclasses (like PinnedStruct) +# explicit address passing to the MemType subclasses (like MemStruct) # constructor set_allocator(my_heap.vm_alloc) @@ -22,7 +22,7 @@ set_allocator(my_heap.vm_alloc) # All the structures and methods will use the python objects but all the data # is in fact stored in the VmMngr -class ListNode(PinnedStruct): +class ListNode(MemStruct): fields = [ # The ", ): +subclass MemStruct and define a list of (, ): # FIXME: "I" => "u32" - class MyStruct(PinnedStruct): + class MyStruct(MemStruct): fields = [ # Scalar field: just struct.pack field with one value ("num", Num("I")), @@ -60,7 +60,7 @@ And access the fields: mstruct.other = addr2 mstruct.other.deref = OtherStruct(jitter.vm, addr) -PinnedUnion and PinnedBitField can also be subclassed, the `fields` field being +MemUnion and MemBitField can also be subclassed, the `fields` field being in the format expected by, respectively, Union and BitField. The `addr` argument can be omited if an allocator is set, in which case the @@ -70,7 +70,7 @@ structure will be automatically allocated in memory: # the allocator is a func(VmMngr) -> integer_address set_allocator(my_heap) -Note that some structures (e.g. PinnedStr or PinnedArray) do not have a static +Note that some structures (e.g. MemStr or MemArray) do not have a static size and cannot be allocated automatically. """ @@ -84,15 +84,15 @@ log.addHandler(console_handler) log.setLevel(logging.WARN) # ALLOCATOR is a function(vm, size) -> allocated_address -# TODO: as a PinnedType class attribute +# TODO: as a MemType class attribute ALLOCATOR = None -# Cache for dynamically generated PinnedTypes +# Cache for dynamically generated MemTypes DYN_MEM_STRUCT_CACHE = {} def set_allocator(alloc_func): """Set an allocator for this module; allows to instanciate statically sized - PinnedTypes (i.e. sizeof() is implemented) without specifying the address + MemTypes (i.e. sizeof() is implemented) without specifying the address (the object is allocated by @alloc_func in the vm. @alloc_func: func(VmMngr) -> integer_address @@ -169,11 +169,11 @@ class Type(object): """Base class to provide methods to describe a type, as well as how to set and get fields from virtual mem. - Each Type subclass is linked to a PinnedType subclass (e.g. Struct to - PinnedStruct, Ptr to PinnedPtr, etc.). + Each Type subclass is linked to a MemType subclass (e.g. Struct to + MemStruct, Ptr to MemPtr, etc.). - When nothing is specified, PinnedValue is used to access the type in memory. - PinnedValue instances have one `.val` field, setting and getting it call + When nothing is specified, MemValue is used to access the type in memory. + MemValue instances have one `.val` field, setting and getting it call the set and get of the Type. Subclasses can either override _pack and _unpack, or get and set if data @@ -214,7 +214,7 @@ class Type(object): """Returns a class with a (vm, addr) constructor that allows to interact with this type in memory. - @return: a PinnedType subclass. + @return: a MemType subclass. """ if self in DYN_MEM_STRUCT_CACHE: return DYN_MEM_STRUCT_CACHE[self] @@ -223,26 +223,26 @@ class Type(object): return pinned_type def _build_pinned_type(self): - """Builds the PinnedType subclass allowing to interract with this type. + """Builds the MemType subclass allowing to interract with this type. Called by self.pinned when it is not in cache. """ pinned_base_class = self._get_pinned_base_class() - pinned_type = type("Pinned%r" % self, (pinned_base_class,), + pinned_type = type("Mem%r" % self, (pinned_base_class,), {'_type': self}) return pinned_type def _get_pinned_base_class(self): - """Return the PinnedType subclass that maps this type in memory""" - return PinnedValue + """Return the MemType subclass that maps this type in memory""" + return MemValue def _get_self_type(self): """Used for the Self trick.""" return self._self_type def _set_self_type(self, self_type): - """If this field refers to PinnedSelf/Self, replace it with @self_type - (a PinnedType subclass) when using it. Generally not used outside this + """If this field refers to MemSelf/Self, replace it with @self_type + (a MemType subclass) when using it. Generally not used outside this module. """ self._self_type = self_type @@ -306,45 +306,45 @@ class Num(RawStruct): class Ptr(Num): """Special case of number of which value indicates the address of a - PinnedType. + MemType. - Mapped to PinnedPtr (see its doc for more info): + Mapped to MemPtr (see its doc for more info): - assert isinstance(mystruct.ptr, PinnedPtr) + assert isinstance(mystruct.ptr, MemPtr) mystruct.ptr = 0x4000 # Assign the Ptr numeric value mystruct.ptr.val = 0x4000 # Also assigns the Ptr numeric value assert isinstance(mystruct.ptr.val, int) # Get the Ptr numeric value - mystruct.ptr.deref # Get the pointed PinnedType - mystruct.ptr.deref = other # Set the pointed PinnedType + mystruct.ptr.deref # Get the pointed MemType + mystruct.ptr.deref = other # Set the pointed MemType """ def __init__(self, fmt, dst_type, *type_args, **type_kwargs): """ @fmt: (str) Num compatible format that will be the Ptr representation in memory - @dst_type: (PinnedType or Type) the PinnedType this Ptr points to. - If a Type is given, it is transformed into a PinnedType with + @dst_type: (MemType or Type) the MemType this Ptr points to. + If a Type is given, it is transformed into a MemType with TheType.pinned. *type_args, **type_kwargs: arguments to pass to the the pointed - PinnedType when instanciating it (e.g. for PinnedStr encoding or - PinnedArray field_type). + MemType when instanciating it (e.g. for MemStr encoding or + MemArray field_type). """ if (not isinstance(dst_type, Type) and not (isinstance(dst_type, type) and - issubclass(dst_type, PinnedType)) and - not dst_type == PinnedSelf): - raise ValueError("dst_type of Ptr must be a PinnedType type, a " - "Type instance, the PinnedSelf marker or a class " + issubclass(dst_type, MemType)) and + not dst_type == MemSelf): + raise ValueError("dst_type of Ptr must be a MemType type, a " + "Type instance, the MemSelf marker or a class " "name.") super(Ptr, self).__init__(fmt) if isinstance(dst_type, Type): - # Patch the field to propagate the PinnedSelf replacement + # Patch the field to propagate the MemSelf replacement dst_type._get_self_type = lambda: self._get_self_type() # dst_type cannot be patched here, since _get_self_type of the outer # class has not yet been set. Patching dst_type involves calling # dst_type.pinned, which will only return a type that does not point - # on PinnedSelf but on the right class only when _get_self_type of the - # outer class has been replaced by _MetaPinnedStruct. + # on MemSelf but on the right class only when _get_self_type of the + # outer class has been replaced by _MetaMemStruct. # In short, dst_type = dst_type.pinned is not valid here, it is done # lazily in _fix_dst_type self._dst_type = dst_type @@ -352,23 +352,23 @@ class Ptr(Num): self._type_kwargs = type_kwargs def _fix_dst_type(self): - if self._dst_type == PinnedSelf: + if self._dst_type == MemSelf: if self._get_self_type() is not None: self._dst_type = self._get_self_type() else: - raise ValueError("Unsupported usecase for PinnedSelf, sorry") + raise ValueError("Unsupported usecase for MemSelf, sorry") if isinstance(self._dst_type, Type): self._dst_type = self._dst_type.pinned @property def dst_type(self): - """Return the type (PinnedType subtype) this Ptr points to.""" + """Return the type (MemType subtype) this Ptr points to.""" self._fix_dst_type() return self._dst_type def set(self, vm, addr, val): - """A Ptr field can be set with a PinnedPtr or an int""" - if isinstance(val, PinnedType) and isinstance(val.get_type(), Ptr): + """A Ptr field can be set with a MemPtr or an int""" + if isinstance(val, MemType) and isinstance(val.get_type(), Ptr): self.set_val(vm, addr, val.val) else: super(Ptr, self).set(vm, addr, val) @@ -393,7 +393,7 @@ class Ptr(Num): *self._type_args, **self._type_kwargs) def deref_set(self, vm, addr, val): - """Serializes the @val PinnedType subclass instance in @vm (VmMngr) at + """Serializes the @val MemType subclass instance in @vm (VmMngr) at @addr. Equivalent to a pointer dereference assignment in C. """ # Sanity check @@ -406,7 +406,7 @@ class Ptr(Num): vm.set_mem(dst_addr, str(val)) def _get_pinned_base_class(self): - return PinnedPtr + return MemPtr def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self.dst_type.get_type()) @@ -427,10 +427,10 @@ class Struct(Type): (, ) list describing the fields of the struct. - Mapped to PinnedStruct. + Mapped to MemStruct. NOTE: The `.pinned` property of Struct creates classes on the fly. If an - equivalent structure is created by subclassing PinnedStruct, an exception + equivalent structure is created by subclassing MemStruct, an exception is raised to prevent creating multiple classes designating the same type. Example: @@ -440,7 +440,7 @@ class Struct(Type): # This raises an exception, because it describes the same structure as # Toto1 - class Toto(PinnedStruct): + class Toto(MemStruct): fields = [("f1", Num("I")), ("f2", Num("I"))] """ @@ -507,7 +507,7 @@ class Struct(Type): return self._fields_desc[name]['field'] def _get_pinned_base_class(self): - return PinnedStruct + return MemStruct def __repr__(self): return "struct %s" % self.name @@ -526,15 +526,15 @@ class Struct(Type): class Union(Struct): """Represents a C union. - Allows to put multiple fields at the same offset in a PinnedStruct, + Allows to put multiple fields at the same offset in a MemStruct, similar to unions in C. The Union will have the size of the largest of its fields. - Mapped to PinnedUnion. + Mapped to MemUnion. Example: - class Example(PinnedStruct): + class Example(MemStruct): fields = [("uni", Union([ ("f1", Num(").pinned is equivalent to the previous MyStruct declaration. - See the various Type-s doc for more information. See PinnedStruct.gen_fields + See the various Type-s doc for more information. See MemStruct.gen_fields doc for more information on how to handle recursive types and cyclic dependencies. """ - __metaclass__ = _MetaPinnedStruct + __metaclass__ = _MetaMemStruct fields = None def get_addr(self, field_name=None): @@ -1179,18 +1179,18 @@ class PinnedStruct(PinnedType): Useful in case of a type cyclic dependency. For example, the following is not possible in python: - class A(PinnedStruct): + class A(MemStruct): fields = [("b", Ptr("I", B))] - class B(PinnedStruct): + class B(MemStruct): fields = [("a", Ptr("I", A))] With gen_fields, the following is the legal equivalent: - class A(PinnedStruct): + class A(MemStruct): pass - class B(PinnedStruct): + class B(MemStruct): fields = [("a", Ptr("I", A))] A.gen_fields([("b", Ptr("I", B))]) @@ -1204,13 +1204,13 @@ class PinnedStruct(PinnedType): if cls._type is None: if cls.fields is None: - raise ValueError("Cannot create a PinnedStruct subclass without" + raise ValueError("Cannot create a MemStruct subclass without" " a cls._type or a cls.fields") cls._type = cls._gen_type(cls.fields) if cls._type in DYN_MEM_STRUCT_CACHE: # FIXME: Maybe a warning would be better? - raise RuntimeError("Another PinnedType has the same type as this " + raise RuntimeError("Another MemType has the same type as this " "one. Use it instead.") # Register this class so that another one will not be created when @@ -1242,21 +1242,21 @@ class PinnedStruct(PinnedType): return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) -class PinnedUnion(PinnedStruct): - """Same as PinnedStruct but all fields have a 0 offset in the struct.""" +class MemUnion(MemStruct): + """Same as MemStruct but all fields have a 0 offset in the struct.""" @classmethod def _gen_type(cls, fields): return Union(fields) -class PinnedBitField(PinnedUnion): - """PinnedUnion of Bits(...) fields.""" +class MemBitField(MemUnion): + """MemUnion of Bits(...) fields.""" @classmethod def _gen_type(cls, fields): return BitField(fields) -class PinnedSelf(PinnedStruct): +class MemSelf(MemStruct): """Special Marker class for reference to current class in a Ptr or Array (mostly Array of Ptr). See Self doc. """ @@ -1264,7 +1264,7 @@ class PinnedSelf(PinnedStruct): return self.__class__.__name__ -class PinnedVoid(PinnedType): +class MemVoid(MemType): """Placeholder for e.g. Ptr to an undetermined type. Useful mostly when casted to another type. Allows to implement C's "void*" pattern. """ @@ -1274,8 +1274,8 @@ class PinnedVoid(PinnedType): return self.__class__.__name__ -class PinnedPtr(PinnedValue): - """Pinned version of a Ptr, provides two properties: +class MemPtr(MemValue): + """Mem version of a Ptr, provides two properties: - val, to set and get the numeric value of the Ptr - deref, to set and get the pointed type """ @@ -1299,7 +1299,7 @@ class PinnedPtr(PinnedValue): return "*%s" % hex(self.val) -class PinnedStr(PinnedValue): +class MemStr(MemValue): """Implements a string representation in memory. The string value can be got or set (with python str/unicode) through the @@ -1330,7 +1330,7 @@ class PinnedStr(PinnedValue): return "%r: %r" % (self.__class__, self.val) -class PinnedArray(PinnedType): +class MemArray(MemType): """An unsized array of type @field_type (a Type subclass instance). This class has no static or dynamic size. @@ -1345,7 +1345,7 @@ class PinnedArray(PinnedType): @property def field_type(self): """Return the Type subclass instance that represents the type of - this PinnedArray items. + this MemArray items. """ return self.get_type().field_type @@ -1360,15 +1360,15 @@ class PinnedArray(PinnedType): def raw(self): raise ValueError("%s is unsized, which prevents from getting its full " - "raw representation. Use PinnedSizedArray instead." % + "raw representation. Use MemSizedArray instead." % self.__class__) def __repr__(self): return "[%r, ...] [%r]" % (self[0], self.field_type) -class PinnedSizedArray(PinnedArray): - """A fixed size PinnedArray. +class MemSizedArray(MemArray): + """A fixed size MemArray. This type is dynamically sized. Generate a fixed @field_type and @array_len array which has a static size by using Array(type, size).pinned. diff --git a/test/analysis/mem.py b/test/analysis/mem.py index b6664cd2..6c7fc9e3 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -5,20 +5,20 @@ import struct from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import PinnedStruct, Num, Ptr, Str, \ +from miasm2.analysis.mem import MemStruct, Num, Ptr, Str, \ Array, RawStruct, Union, \ BitField, Self, Void, Bits, \ - set_allocator, PinnedUnion, Struct + set_allocator, MemUnion, Struct from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE from miasm2.os_dep.common import heap # Two structures with some fields -class OtherStruct(PinnedStruct): +class OtherStruct(MemStruct): fields = [ ("foo", Num("H")), ] -class MyStruct(PinnedStruct): +class MyStruct(MemStruct): fields = [ # Number field: just struct.pack fields with one value ("num", Num("I")), @@ -43,7 +43,7 @@ addr_str3 = 0x1300 jitter.vm.add_memory_page(addr, PAGE_READ | PAGE_WRITE, "\xaa"*size) -# PinnedStruct tests +# MemStruct tests ## Creation # Use manual allocation with explicit addr for the first example mstruct = MyStruct(jitter.vm, addr) @@ -57,7 +57,7 @@ assert mstruct.num == 3 memval = struct.unpack("I", jitter.vm.get_mem(mstruct.get_addr(), 4))[0] assert memval == 3 -## Pinnedset sets the whole structure +## Memset sets the whole structure mstruct.memset() assert mstruct.num == 0 assert mstruct.flags == 0 @@ -105,7 +105,7 @@ assert other2.foo == 0xbeef assert other.get_addr() != other2.get_addr() # Not the same address assert other == other2 # But same value -## Same stuff for Ptr to PinnedField +## Same stuff for Ptr to MemField alloc_addr = my_heap.vm_alloc(jitter.vm, mstruct.get_type().get_field_type("i") .dst_type.sizeof()) @@ -148,7 +148,7 @@ memstr3 = Str("utf16").pinned(jitter.vm, addr_str3) memstr3.val = "That's all folks!" assert memstr3.get_addr() != memstr.get_addr() assert memstr3.get_size() != memstr.get_size() # Size is different -assert str(memstr3) != str(memstr) # Pinned representation is different +assert str(memstr3) != str(memstr) # Mem representation is different assert memstr3 != memstr # Encoding is different, so they are not eq assert memstr3.val == memstr.val # But the python value is the same @@ -204,7 +204,7 @@ assert str(memsarray) == '\x02\x00\x00\x00' + '\xcc' * (4 * 9) # Atypical fields (RawStruct and Array) -class MyStruct2(PinnedStruct): +class MyStruct2(MemStruct): fields = [ ("s1", RawStruct("=BI")), ("s2", Array(Num("B"), 10)), @@ -236,7 +236,7 @@ ms2.s2 = [1] * 10 for val in ms2.s2: assert val == 1 -### Field assignment (PinnedSizedArray) +### Field assignment (MemSizedArray) array2 = Array(Num("B"), 10).pinned(jitter.vm) jitter.vm.set_mem(array2.get_addr(), '\x02'*10) for val in array2: @@ -246,14 +246,14 @@ for val in ms2.s2: assert val == 2 -# Inlining a PinnedType tests -class InStruct(PinnedStruct): +# Inlining a MemType tests +class InStruct(MemStruct): fields = [ ("foo", Num("B")), ("bar", Num("B")), ] -class ContStruct(PinnedStruct): +class ContStruct(MemStruct): fields = [ ("one", Num("B")), ("instruct", InStruct.get_type()), @@ -286,7 +286,7 @@ assert jitter.vm.get_mem(cont.get_addr(), len(cont)) == '\x01\x02\x03\x04' # Union test -class UniStruct(PinnedStruct): +class UniStruct(MemStruct): fields = [ ("one", Num("B")), ("union", Union([ @@ -312,7 +312,7 @@ assert uni.union.instruct.bar == 0x22 # BitField test -class BitStruct(PinnedUnion): +class BitStruct(MemUnion): fields = [ ("flags_num", Num("H")), ("flags", BitField(Num("H"), [ @@ -346,7 +346,7 @@ assert bit.flags.f4_1 == 1 # Unhealthy ideas -class UnhealthyIdeas(PinnedStruct): +class UnhealthyIdeas(MemStruct): fields = [ ("pastruct", Ptr("I", Array(RawStruct("=Bf")))), ("apstr", Array(Ptr("I", Str()), 10)), @@ -387,10 +387,10 @@ assert ideas.pppself.deref.deref.deref == ideas # Circular dependencies -class A(PinnedStruct): +class A(MemStruct): pass -class B(PinnedStruct): +class B(MemStruct): fields = [("a", Ptr("I", A)),] # Gen A's fields after declaration @@ -405,30 +405,30 @@ assert b.a.deref == a # Cast tests -# PinnedStruct cast -PinnedInt = Num("I").pinned -PinnedShort = Num("H").pinned -dword = PinnedInt(jitter.vm) +# MemStruct cast +MemInt = Num("I").pinned +MemShort = Num("H").pinned +dword = MemInt(jitter.vm) dword.val = 0x12345678 -assert isinstance(dword.cast(PinnedShort), PinnedShort) -assert dword.cast(PinnedShort).val == 0x5678 +assert isinstance(dword.cast(MemShort), MemShort) +assert dword.cast(MemShort).val == 0x5678 # Field cast ms2.s2[0] = 0x34 ms2.s2[1] = 0x12 -assert ms2.cast_field("s2", PinnedShort).val == 0x1234 +assert ms2.cast_field("s2", MemShort).val == 0x1234 # Other method -assert PinnedShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 +assert MemShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 # Manual cast inside an Array ms2.s2[4] = 0xcd ms2.s2[5] = 0xab -assert PinnedShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd +assert MemShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd # void* style cast -PinnedPtrVoid = Ptr("I", Void()).pinned -p = PinnedPtrVoid(jitter.vm) +MemPtrVoid = Ptr("I", Void()).pinned +p = MemPtrVoid(jitter.vm) p.val = mstruct.get_addr() assert p.deref.cast(MyStruct) == mstruct assert p.cast(Ptr("I", MyStruct)).deref == mstruct @@ -474,7 +474,7 @@ assert BitField(Num("B"), [("f1", 1), ("f2", 4), ("f3", 1)]) != \ BitField(Num("B"), [("f1", 2), ("f2", 4), ("f3", 1)]) -# Quick PinnedField.pinned/PinnedField hash test +# Quick MemField.pinned/MemField hash test assert Num("f").pinned(jitter.vm, addr) == Num("f").pinned(jitter.vm, addr) # Types are cached assert Num("f").pinned == Num("f").pinned -- cgit 1.4.1 From ba2df16277d7d4deae118ed11e1e92cd478045ec Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 30 Nov 2015 16:00:26 +0100 Subject: MemStruct/Types: Renamed analysis.mem to core.types --- example/jitter/memstruct.py | 234 -------- example/jitter/types.py | 234 ++++++++ miasm2/analysis/mem.py | 1399 ------------------------------------------- miasm2/core/types.py | 1399 +++++++++++++++++++++++++++++++++++++++++++ test/analysis/mem.py | 506 ---------------- test/core/types.py | 506 ++++++++++++++++ test/test_all.py | 5 +- 7 files changed, 2142 insertions(+), 2141 deletions(-) delete mode 100644 example/jitter/memstruct.py create mode 100644 example/jitter/types.py delete mode 100644 miasm2/analysis/mem.py create mode 100644 miasm2/core/types.py delete mode 100644 test/analysis/mem.py create mode 100644 test/core/types.py (limited to 'example') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py deleted file mode 100644 index 4ddbea86..00000000 --- a/example/jitter/memstruct.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python -"""This script is just a short example of common usages for miasm2.analysis.mem. -For a more complete view of what is possible, tests/analysis/mem.py covers -most of the module possibilities, and the module doc gives useful information -as well. -""" - -from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import MemStruct, Self, Void, Str, Array, Ptr, \ - Num, Array, set_allocator -from miasm2.os_dep.common import heap - -# Instanciate a heap -my_heap = heap() -# And set it as the default memory allocator, to avoid manual allocation and -# explicit address passing to the MemType subclasses (like MemStruct) -# constructor -set_allocator(my_heap.vm_alloc) - -# Let's reimplement a simple C generic linked list mapped on a VmMngr. - -# All the structures and methods will use the python objects but all the data -# is in fact stored in the VmMngr - -class ListNode(MemStruct): - fields = [ - # The ", ),]; creates fields that correspond to - certain bits of the field; analogous to a Union of Bits (see Bits below) - - Str: a character string, with an encoding; not directly mapped to a C - type, it is a higher level notion provided for ease of use - - Void: analogous to C void, can be a placeholder in void*-style cases. - - Self: special marker to reference a Struct inside itself (FIXME: to - remove?) - -And some less common types: - - - Bits: mask only some bits of a Num - - RawStruct: abstraction over a simple struct pack/unpack (no mapping to a - standard C type) - -For each type, the `.pinned` property returns a MemType subclass that -allows to access the field in memory. - - -The easiest way to use the API to declare and manipulate new structures is to -subclass MemStruct and define a list of (, ): - - # FIXME: "I" => "u32" - class MyStruct(MemStruct): - fields = [ - # Scalar field: just struct.pack field with one value - ("num", Num("I")), - ("flags", Num("B")), - # Ptr fields contain two fields: "val", for the numerical value, - # and "deref" to get the pointed object - ("other", Ptr("I", OtherStruct)), - # Ptr to a variable length String - ("s", Ptr("I", Str())), - ("i", Ptr("I", Num("I"))), - ] - -And access the fields: - - mstruct = MyStruct(jitter.vm, addr) - mstruct.num = 3 - assert mstruct.num == 3 - mstruct.other.val = addr2 - # Also works: - mstruct.other = addr2 - mstruct.other.deref = OtherStruct(jitter.vm, addr) - -MemUnion and MemBitField can also be subclassed, the `fields` field being -in the format expected by, respectively, Union and BitField. - -The `addr` argument can be omited if an allocator is set, in which case the -structure will be automatically allocated in memory: - - my_heap = miasm2.os_dep.common.heap() - # the allocator is a func(VmMngr) -> integer_address - set_allocator(my_heap) - -Note that some structures (e.g. MemStr or MemArray) do not have a static -size and cannot be allocated automatically. -""" - -import logging -import struct - -log = logging.getLogger(__name__) -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARN) - -# ALLOCATOR is a function(vm, size) -> allocated_address -# TODO: as a MemType class attribute -ALLOCATOR = None - -# Cache for dynamically generated MemTypes -DYN_MEM_STRUCT_CACHE = {} - -def set_allocator(alloc_func): - """Set an allocator for this module; allows to instanciate statically sized - MemTypes (i.e. sizeof() is implemented) without specifying the address - (the object is allocated by @alloc_func in the vm. - - @alloc_func: func(VmMngr) -> integer_address - """ - global ALLOCATOR - ALLOCATOR = alloc_func - - -# Helpers - -def indent(s, size=4): - """Indent a string with @size spaces""" - return ' '*size + ('\n' + ' '*size).join(s.split('\n')) - - -# FIXME: copied from miasm2.os_dep.common and fixed -def get_str_ansi(vm, addr, max_char=None): - """Get a null terminated ANSI encoded string from a VmMngr. - - @vm: VmMngr instance - @max_char: max number of characters to get in memory - """ - l = 0 - tmp = addr - while ((max_char is None or l < max_char) and - vm.get_mem(tmp, 1) != "\x00"): - tmp += 1 - l += 1 - return vm.get_mem(addr, l).decode("latin1") - - -# TODO: get_raw_str_utf16 for length calculus -def get_str_utf16(vm, addr, max_char=None): - """Get a (double) null terminated utf16 little endian encoded string from - a VmMngr. This encoding is mainly used in Windows. - - FIXME: the implementation do not work with codepoints that are encoded on - more than 2 bytes in utf16. - - @vm: VmMngr instance - @max_char: max number of bytes to get in memory - """ - l = 0 - tmp = addr - # TODO: test if fetching per page rather than 2 byte per 2 byte is worth it? - while ((max_char is None or l < max_char) and - vm.get_mem(tmp, 2) != "\x00\x00"): - tmp += 2 - l += 2 - s = vm.get_mem(addr, l) - return s.decode('utf-16le') - - -def set_str_ansi(vm, addr, s): - """Encode a string to null terminated ascii/ansi and set it in a VmMngr - memory. - - @vm: VmMngr instance - @addr: start address to serialize the string to - s: the str to serialize - """ - vm.set_mem(addr, s + "\x00") - - -def set_str_utf16(vm, addr, s): - """Same as set_str_ansi with (double) null terminated utf16 encoding.""" - s = (s + '\x00').encode('utf-16le') - vm.set_mem(addr, s) - - -# Type classes - -class Type(object): - """Base class to provide methods to describe a type, as well as how to set - and get fields from virtual mem. - - Each Type subclass is linked to a MemType subclass (e.g. Struct to - MemStruct, Ptr to MemPtr, etc.). - - When nothing is specified, MemValue is used to access the type in memory. - MemValue instances have one `.val` field, setting and getting it call - the set and get of the Type. - - Subclasses can either override _pack and _unpack, or get and set if data - serialization requires more work (see Struct implementation for an example). - - TODO: move any trace of vm and addr out of these classes? - """ - - _self_type = None - - def _pack(self, val): - """Serializes the python value @val to a raw str""" - raise NotImplementedError() - - def _unpack(self, raw_str): - """Deserializes a raw str to an object representing the python value - of this field. - """ - raise NotImplementedError() - - def set(self, vm, addr, val): - """Set a VmMngr memory from a value. - - @vm: VmMngr instance - @addr: the start adress in memory to set - @val: the python value to serialize in @vm at @addr - """ - raw = self._pack(val) - vm.set_mem(addr, raw) - - def get(self, vm, addr): - """Get the python value of a field from a VmMngr memory at @addr.""" - raw = vm.get_mem(addr, self.size()) - return self._unpack(raw) - - @property - def pinned(self): - """Returns a class with a (vm, addr) constructor that allows to - interact with this type in memory. - - @return: a MemType subclass. - """ - if self in DYN_MEM_STRUCT_CACHE: - return DYN_MEM_STRUCT_CACHE[self] - pinned_type = self._build_pinned_type() - DYN_MEM_STRUCT_CACHE[self] = pinned_type - return pinned_type - - def _build_pinned_type(self): - """Builds the MemType subclass allowing to interract with this type. - - Called by self.pinned when it is not in cache. - """ - pinned_base_class = self._get_pinned_base_class() - pinned_type = type("Mem%r" % self, (pinned_base_class,), - {'_type': self}) - return pinned_type - - def _get_pinned_base_class(self): - """Return the MemType subclass that maps this type in memory""" - return MemValue - - def _get_self_type(self): - """Used for the Self trick.""" - return self._self_type - - def _set_self_type(self, self_type): - """If this field refers to MemSelf/Self, replace it with @self_type - (a MemType subclass) when using it. Generally not used outside this - module. - """ - self._self_type = self_type - - def size(self): - """Return the size in bytes of the serialized version of this field""" - raise NotImplementedError() - - def __len__(self): - return self.size() - - def __neq__(self, other): - return not self == other - - -class RawStruct(Type): - """Dumb struct.pack/unpack field. Mainly used to factorize code. - - Value is a tuple corresponding to the struct @fmt passed to the constructor. - """ - - def __init__(self, fmt): - self._fmt = fmt - - def _pack(self, fields): - return struct.pack(self._fmt, *fields) - - def _unpack(self, raw_str): - return struct.unpack(self._fmt, raw_str) - - def size(self): - return struct.calcsize(self._fmt) - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, self._fmt) - - def __eq__(self, other): - return self.__class__ == other.__class__ and self._fmt == other._fmt - - def __hash__(self): - return hash((self.__class__, self._fmt)) - - -class Num(RawStruct): - """Represents a number (integer or float). The number is encoded with - a struct-style format which must represent only one value. - - TODO: use u32, i16, etc. for format. - """ - - def _pack(self, number): - return super(Num, self)._pack([number]) - - def _unpack(self, raw_str): - upck = super(Num, self)._unpack(raw_str) - if len(upck) != 1: - raise ValueError("Num format string unpacks to multiple values, " - "should be 1") - return upck[0] - - -class Ptr(Num): - """Special case of number of which value indicates the address of a - MemType. - - Mapped to MemPtr (see its doc for more info): - - assert isinstance(mystruct.ptr, MemPtr) - mystruct.ptr = 0x4000 # Assign the Ptr numeric value - mystruct.ptr.val = 0x4000 # Also assigns the Ptr numeric value - assert isinstance(mystruct.ptr.val, int) # Get the Ptr numeric value - mystruct.ptr.deref # Get the pointed MemType - mystruct.ptr.deref = other # Set the pointed MemType - """ - - def __init__(self, fmt, dst_type, *type_args, **type_kwargs): - """ - @fmt: (str) Num compatible format that will be the Ptr representation - in memory - @dst_type: (MemType or Type) the MemType this Ptr points to. - If a Type is given, it is transformed into a MemType with - TheType.pinned. - *type_args, **type_kwargs: arguments to pass to the the pointed - MemType when instanciating it (e.g. for MemStr encoding or - MemArray field_type). - """ - if (not isinstance(dst_type, Type) and - not (isinstance(dst_type, type) and - issubclass(dst_type, MemType)) and - not dst_type == MemSelf): - raise ValueError("dst_type of Ptr must be a MemType type, a " - "Type instance, the MemSelf marker or a class " - "name.") - super(Ptr, self).__init__(fmt) - if isinstance(dst_type, Type): - # Patch the field to propagate the MemSelf replacement - dst_type._get_self_type = lambda: self._get_self_type() - # dst_type cannot be patched here, since _get_self_type of the outer - # class has not yet been set. Patching dst_type involves calling - # dst_type.pinned, which will only return a type that does not point - # on MemSelf but on the right class only when _get_self_type of the - # outer class has been replaced by _MetaMemStruct. - # In short, dst_type = dst_type.pinned is not valid here, it is done - # lazily in _fix_dst_type - self._dst_type = dst_type - self._type_args = type_args - self._type_kwargs = type_kwargs - - def _fix_dst_type(self): - if self._dst_type == MemSelf: - if self._get_self_type() is not None: - self._dst_type = self._get_self_type() - else: - raise ValueError("Unsupported usecase for MemSelf, sorry") - if isinstance(self._dst_type, Type): - self._dst_type = self._dst_type.pinned - - @property - def dst_type(self): - """Return the type (MemType subtype) this Ptr points to.""" - self._fix_dst_type() - return self._dst_type - - def set(self, vm, addr, val): - """A Ptr field can be set with a MemPtr or an int""" - if isinstance(val, MemType) and isinstance(val.get_type(), Ptr): - self.set_val(vm, addr, val.val) - else: - super(Ptr, self).set(vm, addr, val) - - def get(self, vm, addr): - return self.pinned(vm, addr) - - def get_val(self, vm, addr): - """Get the numeric value of a Ptr""" - return super(Ptr, self).get(vm, addr) - - def set_val(self, vm, addr, val): - """Set the numeric value of a Ptr""" - return super(Ptr, self).set(vm, addr, val) - - def deref_get(self, vm, addr): - """Deserializes the data in @vm (VmMngr) at @addr to self.dst_type. - Equivalent to a pointer dereference rvalue in C. - """ - dst_addr = self.get_val(vm, addr) - return self.dst_type(vm, dst_addr, - *self._type_args, **self._type_kwargs) - - def deref_set(self, vm, addr, val): - """Serializes the @val MemType subclass instance in @vm (VmMngr) at - @addr. Equivalent to a pointer dereference assignment in C. - """ - # Sanity check - if self.dst_type != val.__class__: - log.warning("Original type was %s, overriden by value of type %s", - self._dst_type.__name__, val.__class__.__name__) - - # Actual job - dst_addr = self.get_val(vm, addr) - vm.set_mem(dst_addr, str(val)) - - def _get_pinned_base_class(self): - return MemPtr - - def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self.dst_type.get_type()) - - def __eq__(self, other): - return super(Ptr, self).__eq__(other) and \ - self.dst_type == other.dst_type and \ - self._type_args == other._type_args and \ - self._type_kwargs == other._type_kwargs - - def __hash__(self): - return hash((super(Ptr, self).__hash__(), self.dst_type, - self._type_args)) - - -class Struct(Type): - """Equivalent to a C struct type. Composed of a name, and a - (, ) list describing the fields - of the struct. - - Mapped to MemStruct. - - NOTE: The `.pinned` property of Struct creates classes on the fly. If an - equivalent structure is created by subclassing MemStruct, an exception - is raised to prevent creating multiple classes designating the same type. - - Example: - s = Struct("Toto", [("f1", Num("I")), ("f2", Num("I"))]) - - Toto1 = s.pinned - - # This raises an exception, because it describes the same structure as - # Toto1 - class Toto(MemStruct): - fields = [("f1", Num("I")), ("f2", Num("I"))] - """ - - def __init__(self, name, fields): - self.name = name - # fields is immutable - self._fields = tuple(fields) - self._gen_fields() - - def _gen_fields(self): - """Precompute useful metadata on self.fields.""" - self._fields_desc = {} - offset = 0 - for name, field in self._fields: - # For reflexion - field._set_self_type(self) - self._fields_desc[name] = {"field": field, "offset": offset} - offset += field.size() - - @property - def fields(self): - return self._fields - - def set(self, vm, addr, val): - raw = str(val) - vm.set_mem(addr, raw) - - def get(self, vm, addr): - return self.pinned(vm, addr) - - def get_field(self, vm, addr, name): - """Get a field value by @name and base structure @addr in @vm VmMngr.""" - if name not in self._fields_desc: - raise ValueError("'%s' type has no field '%s'" % (self, name)) - field = self.get_field_type(name) - offset = self.get_offset(name) - return field.get(vm, addr + offset) - - def set_field(self, vm, addr, name, val): - """Set a field value by @name and base structure @addr in @vm VmMngr. - @val is the python value corresponding to this field type. - """ - if name not in self._fields_desc: - raise AttributeError("'%s' object has no attribute '%s'" - % (self.__class__.__name__, name)) - field = self.get_field_type(name) - offset = self.get_offset(name) - field.set(vm, addr + offset, val) - - def size(self): - return sum(field.size() for _, field in self.fields) - - def get_offset(self, field_name): - """ - @field_name: (str, optional) the name of the field to get the - offset of - """ - if field_name not in self._fields_desc: - raise ValueError("This structure has no %s field" % field_name) - return self._fields_desc[field_name]['offset'] - - def get_field_type(self, name): - """Return the Type subclass instance describing field @name.""" - return self._fields_desc[name]['field'] - - def _get_pinned_base_class(self): - return MemStruct - - def __repr__(self): - return "struct %s" % self.name - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self.fields == other.fields and \ - self.name == other.name - - def __hash__(self): - # Only hash name, not fields, because if a field is a Ptr to this - # Struct type, an infinite loop occurs - return hash((self.__class__, self.name)) - - -class Union(Struct): - """Represents a C union. - - Allows to put multiple fields at the same offset in a MemStruct, - similar to unions in C. The Union will have the size of the largest of its - fields. - - Mapped to MemUnion. - - Example: - - class Example(MemStruct): - fields = [("uni", Union([ - ("f1", Num("= self.size()): - raise IndexError("Index %s out of bounds" % idx) - - def _get_pinned_base_class(self): - if self.is_sized(): - return MemSizedArray - else: - return MemArray - - def __repr__(self): - return "%r[%s]" % (self.field_type, self.array_len or "unsized") - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self.field_type == other.field_type and \ - self.array_len == other.array_len - - def __hash__(self): - return hash((self.__class__, self.field_type, self.array_len)) - - -class Bits(Type): - """Helper class for BitField, not very useful on its own. Represents some - bits of a Num. - - The @backing_num is used to know how to serialize/deserialize data in vm, - but getting/setting this fields only affects bits from @bit_offset to - @bit_offset + @bits. Masking and shifting is handled by the class, the aim - is to provide a transparent way to set and get some bits of a num. - """ - - def __init__(self, backing_num, bits, bit_offset): - if not isinstance(backing_num, Num): - raise ValueError("backing_num should be a Num instance") - self._num = backing_num - self._bits = bits - self._bit_offset = bit_offset - - def set(self, vm, addr, val): - val_mask = (1 << self._bits) - 1 - val_shifted = (val & val_mask) << self._bit_offset - num_size = self._num.size() * 8 - - full_num_mask = (1 << num_size) - 1 - num_mask = (~(val_mask << self._bit_offset)) & full_num_mask - - num_val = self._num.get(vm, addr) - res_val = (num_val & num_mask) | val_shifted - self._num.set(vm, addr, res_val) - - def get(self, vm, addr): - val_mask = (1 << self._bits) - 1 - num_val = self._num.get(vm, addr) - res_val = (num_val >> self._bit_offset) & val_mask - return res_val - - def size(self): - return self._num.size() - - @property - def bit_size(self): - """Number of bits read/written by this class""" - return self._bits - - @property - def bit_offset(self): - """Offset in bits (beginning at 0, the LSB) from which to read/write - bits. - """ - return self._bit_offset - - def __repr__(self): - return "%s%r(%d:%d)" % (self.__class__.__name__, self._num, - self._bit_offset, self._bit_offset + self._bits) - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self._num == other._num and self._bits == other._bits and \ - self._bit_offset == other._bit_offset - - def __hash__(self): - return hash((self.__class__, self._num, self._bits, self._bit_offset)) - - -class BitField(Union): - """A C-like bitfield. - - Constructed with a list [(, )] and a - @backing_num. The @backing_num is a Num instance that determines the total - size of the bitfield and the way the bits are serialized/deserialized (big - endian int, little endian short...). Can be seen (and implemented) as a - Union of Bits fields. - - Mapped to MemBitField. - - Creates fields that allow to access the bitfield fields easily. Example: - - class Example(MemStruct): - fields = [("bf", BitField(Num("B"), [ - ("f1", 2), - ("f2", 4), - ("f3", 1) - ]) - )] - - ex = Example(vm, addr) - ex.memset() - ex.f2 = 2 - ex.f1 = 5 # 5 does not fit on two bits, it will be binarily truncated - assert ex.f1 == 3 - assert ex.f2 == 2 - assert ex.f3 == 0 # previously memset() - assert ex.bf == 3 + 2 << 2 - """ - - def __init__(self, backing_num, bit_list): - """@backing num: Num intance, @bit_list: [(name, n_bits)]""" - self._num = backing_num - fields = [] - offset = 0 - for name, bits in bit_list: - fields.append((name, Bits(self._num, bits, offset))) - offset += bits - if offset > self._num.size() * 8: - raise ValueError("sum of bit lengths is > to the backing num size") - super(BitField, self).__init__(fields) - - def set(self, vm, addr, val): - self._num.set(vm, addr, val) - - def _get_pinned_base_class(self): - return MemBitField - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self._num == other._num and super(BitField, self).__eq__(other) - - def __hash__(self): - return hash((super(BitField, self).__hash__(), self._num)) - - def __repr__(self): - fields_repr = ', '.join("%s: %r" % (name, field.bit_size) - for name, field in self.fields) - return "%s(%s)" % (self.__class__.__name__, fields_repr) - - -class Str(Type): - """A string type that handles encoding. This type is unsized (no static - size). - - The @encoding is passed to the constructor, and is currently either null - terminated "ansi" (latin1) or (double) null terminated "utf16". Be aware - that the utf16 implementation is a bit buggy... - - Mapped to MemStr. - """ - - def __init__(self, encoding="ansi"): - # TODO: encoding as lambda - if encoding not in ["ansi", "utf16"]: - raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") - self._enc = encoding - - def get(self, vm, addr): - """Set the string value in memory""" - if self._enc == "ansi": - get_str = get_str_ansi - elif self._enc == "utf16": - get_str = get_str_utf16 - else: - raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") - return get_str(vm, addr) - - def set(self, vm, addr, s): - """Get the string value from memory""" - if self._enc == "ansi": - set_str = set_str_ansi - elif self._enc == "utf16": - set_str = set_str_utf16 - else: - raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") - set_str(vm, addr, s) - - def size(self): - """This type is unsized.""" - raise ValueError("Str is unsized") - - @property - def enc(self): - """This Str's encoding name (as a str).""" - return self._enc - - def _get_pinned_base_class(self): - return MemStr - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, self.enc) - - def __eq__(self, other): - return self.__class__ == other.__class__ and self._enc == other._enc - - def __hash__(self): - return hash((self.__class__, self._enc)) - - -class Void(Type): - """Represents the C void type. - - Mapped to MemVoid. - """ - - def _build_pinned_type(self): - return MemVoid - - def __eq__(self, other): - return self.__class__ == other.__class__ - - def __hash__(self): - return hash(self.__class__) - - -class Self(Void): - """Special marker to reference a type inside itself. - - Mapped to MemSelf. - - Example: - class ListNode(MemStruct): - fields = [ - ("next", Ptr(", ) - - instances of this class will have properties to interract with these - fields. - - Example: - class MyStruct(MemStruct): - fields = [ - # Scalar field: just struct.pack field with one value - ("num", Num("I")), - ("flags", Num("B")), - # Ptr fields contain two fields: "val", for the numerical value, - # and "deref" to get the pointed object - ("other", Ptr("I", OtherStruct)), - # Ptr to a variable length String - ("s", Ptr("I", Str())), - ("i", Ptr("I", Num("I"))), - ] - - mstruct = MyStruct(vm, addr) - - # Field assignment modifies virtual memory - mstruct.num = 3 - assert mstruct.num == 3 - memval = struct.unpack("I", vm.get_mem(mstruct.get_addr(), - 4))[0] - assert memval == mstruct.num - - # Memset sets the whole structure - mstruct.memset() - assert mstruct.num == 0 - mstruct.memset('\x11') - assert mstruct.num == 0x11111111 - - other = OtherStruct(vm, addr2) - mstruct.other = other.get_addr() - assert mstruct.other.val == other.get_addr() - assert mstruct.other.deref == other - assert mstruct.other.deref.foo == 0x1234 - - Note that: - MyStruct = Struct("MyStruct", ).pinned - is equivalent to the previous MyStruct declaration. - - See the various Type-s doc for more information. See MemStruct.gen_fields - doc for more information on how to handle recursive types and cyclic - dependencies. - """ - __metaclass__ = _MetaMemStruct - fields = None - - def get_addr(self, field_name=None): - """ - @field_name: (str, optional) the name of the field to get the - address of - """ - if field_name is not None: - offset = self._type.get_offset(field_name) - else: - offset = 0 - return self._addr + offset - - def get_field(self, name): - """Get a field value by name. - - useless most of the time since fields are accessible via self.. - """ - return self._type.get_field(self._vm, self.get_addr(), name) - - def set_field(self, name, val): - """Set a field value by name. @val is the python value corresponding to - this field type. - - useless most of the time since fields are accessible via self.. - """ - return self._type.set_field(self._vm, self.get_addr(), name, val) - - def cast_field(self, field, other_type): - """In this implementation, @field is a field name""" - if isinstance(other_type, Type): - other_type = other_type.pinned - return other_type(self._vm, self.get_addr(field)) - - # Field generation method, voluntarily public to be able to gen fields - # after class definition - @classmethod - def gen_fields(cls, fields=None): - """Generate the fields of this class (so that they can be accessed with - self.) from a @fields list, as described in the class doc. - - Useful in case of a type cyclic dependency. For example, the following - is not possible in python: - - class A(MemStruct): - fields = [("b", Ptr("I", B))] - - class B(MemStruct): - fields = [("a", Ptr("I", A))] - - With gen_fields, the following is the legal equivalent: - - class A(MemStruct): - pass - - class B(MemStruct): - fields = [("a", Ptr("I", A))] - - A.gen_fields([("b", Ptr("I", B))]) - """ - if fields is not None: - if cls.fields is not None: - raise ValueError("Cannot regen fields of a class. Setting " - "cls.fields at class definition and calling " - "gen_fields are mutually exclusive.") - cls.fields = fields - - if cls._type is None: - if cls.fields is None: - raise ValueError("Cannot create a MemStruct subclass without" - " a cls._type or a cls.fields") - cls._type = cls._gen_type(cls.fields) - - if cls._type in DYN_MEM_STRUCT_CACHE: - # FIXME: Maybe a warning would be better? - raise RuntimeError("Another MemType has the same type as this " - "one. Use it instead.") - - # Register this class so that another one will not be created when - # calling cls._type.pinned - DYN_MEM_STRUCT_CACHE[cls._type] = cls - - cls._gen_attributes() - - @classmethod - def _gen_attributes(cls): - # Generate self. getter and setters - for name, field in cls._type.fields: - setattr(cls, name, property( - lambda self, name=name: self.get_field(name), - lambda self, val, name=name: self.set_field(name, val) - )) - - @classmethod - def _gen_type(cls, fields): - return Struct(cls.__name__, fields) - - def __repr__(self): - out = [] - for name, field in self._type.fields: - val_repr = repr(self.get_field(name)) - if '\n' in val_repr: - val_repr = '\n' + indent(val_repr, 4) - out.append("%s: %r = %s" % (name, field, val_repr)) - return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) - - -class MemUnion(MemStruct): - """Same as MemStruct but all fields have a 0 offset in the struct.""" - @classmethod - def _gen_type(cls, fields): - return Union(fields) - - -class MemBitField(MemUnion): - """MemUnion of Bits(...) fields.""" - @classmethod - def _gen_type(cls, fields): - return BitField(fields) - - -class MemSelf(MemStruct): - """Special Marker class for reference to current class in a Ptr or Array - (mostly Array of Ptr). See Self doc. - """ - def __repr__(self): - return self.__class__.__name__ - - -class MemVoid(MemType): - """Placeholder for e.g. Ptr to an undetermined type. Useful mostly when - casted to another type. Allows to implement C's "void*" pattern. - """ - _type = Void() - - def __repr__(self): - return self.__class__.__name__ - - -class MemPtr(MemValue): - """Mem version of a Ptr, provides two properties: - - val, to set and get the numeric value of the Ptr - - deref, to set and get the pointed type - """ - @property - def val(self): - return self._type.get_val(self._vm, self._addr) - - @val.setter - def val(self, value): - return self._type.set_val(self._vm, self._addr, value) - - @property - def deref(self): - return self._type.deref_get(self._vm, self._addr) - - @deref.setter - def deref(self, val): - return self._type.deref_set(self._vm, self._addr, val) - - def __repr__(self): - return "*%s" % hex(self.val) - - -class MemStr(MemValue): - """Implements a string representation in memory. - - The string value can be got or set (with python str/unicode) through the - self.val attribute. String encoding/decoding is handled by the class, - - This type is dynamically sized only (get_size is implemented, not sizeof). - """ - - def get_size(self): - """This get_size implementation is quite unsafe: it reads the string - underneath to determine the size, it may therefore read a lot of memory - and provoke mem faults (analogous to strlen). - """ - val = self.val - if self.get_type().enc == "ansi": - return len(val) + 1 - elif self.get_type().enc == "utf16": - # FIXME: real encoding... - return len(val) * 2 + 2 - else: - raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") - - def raw(self): - raw = self._vm.get_mem(self.get_addr(), self.get_size()) - return raw - - def __repr__(self): - return "%r: %r" % (self.__class__, self.val) - - -class MemArray(MemType): - """An unsized array of type @field_type (a Type subclass instance). - This class has no static or dynamic size. - - It can be indexed for setting and getting elements, example: - - array = Array(Num("I")).pinned(vm, addr)) - array[2] = 5 - array[4:8] = [0, 1, 2, 3] - print array[20] - """ - - @property - def field_type(self): - """Return the Type subclass instance that represents the type of - this MemArray items. - """ - return self.get_type().field_type - - def get_addr(self, idx=0): - return self._addr + self.get_type().get_offset(idx) - - def __getitem__(self, idx): - return self.get_type().get_item(self._vm, self._addr, idx) - - def __setitem__(self, idx, item): - self.get_type().set_item(self._vm, self._addr, idx, item) - - def raw(self): - raise ValueError("%s is unsized, which prevents from getting its full " - "raw representation. Use MemSizedArray instead." % - self.__class__) - - def __repr__(self): - return "[%r, ...] [%r]" % (self[0], self.field_type) - - -class MemSizedArray(MemArray): - """A fixed size MemArray. - - This type is dynamically sized. Generate a fixed @field_type and @array_len - array which has a static size by using Array(type, size).pinned. - """ - - @property - def array_len(self): - """The length, in number of elements, of this array.""" - return self.get_type().array_len - - def get_size(self): - return self.get_type().size() - - def __iter__(self): - for i in xrange(self.get_type().array_len): - yield self[i] - - def raw(self): - return self._vm.get_mem(self.get_addr(), self.get_size()) - - def __repr__(self): - item_reprs = [repr(item) for item in self] - if self.array_len > 0 and '\n' in item_reprs[0]: - items = '\n' + indent(',\n'.join(item_reprs), 2) + '\n' - else: - items = ', '.join(item_reprs) - return "[%s] [%r; %s]" % (items, self.field_type, self.array_len) - diff --git a/miasm2/core/types.py b/miasm2/core/types.py new file mode 100644 index 00000000..3c8d5b8b --- /dev/null +++ b/miasm2/core/types.py @@ -0,0 +1,1399 @@ +"""This module provides classes to manipulate C structures backed by a VmMngr +object (a miasm sandbox virtual memory). + +It provides two families of classes, Type-s (Num, Ptr, Str...) and their +associated MemType-s. A Type subclass instance represents a fully defined C +type. A MemType subclass instance represents a C LValue (or variable): it is +a type attached to the memory. Available types are: + + - Num: for number (float or int) handling + - Ptr: a pointer to another Type + - Struct: equivalent to a C struct definition + - Union: similar to union in C, list of Types at the same offset in a + structure; the union has the size of the biggest Type (~ Struct with all + the fields at offset 0) + - Array: an array of items of the same type; can have a fixed size or + not (e.g. char[3] vs char* used as an array in C) + - BitField: similar to C bitfields, a list of + [(, ),]; creates fields that correspond to + certain bits of the field; analogous to a Union of Bits (see Bits below) + - Str: a character string, with an encoding; not directly mapped to a C + type, it is a higher level notion provided for ease of use + - Void: analogous to C void, can be a placeholder in void*-style cases. + - Self: special marker to reference a Struct inside itself (FIXME: to + remove?) + +And some less common types: + + - Bits: mask only some bits of a Num + - RawStruct: abstraction over a simple struct pack/unpack (no mapping to a + standard C type) + +For each type, the `.pinned` property returns a MemType subclass that +allows to access the field in memory. + + +The easiest way to use the API to declare and manipulate new structures is to +subclass MemStruct and define a list of (, ): + + # FIXME: "I" => "u32" + class MyStruct(MemStruct): + fields = [ + # Scalar field: just struct.pack field with one value + ("num", Num("I")), + ("flags", Num("B")), + # Ptr fields contain two fields: "val", for the numerical value, + # and "deref" to get the pointed object + ("other", Ptr("I", OtherStruct)), + # Ptr to a variable length String + ("s", Ptr("I", Str())), + ("i", Ptr("I", Num("I"))), + ] + +And access the fields: + + mstruct = MyStruct(jitter.vm, addr) + mstruct.num = 3 + assert mstruct.num == 3 + mstruct.other.val = addr2 + # Also works: + mstruct.other = addr2 + mstruct.other.deref = OtherStruct(jitter.vm, addr) + +MemUnion and MemBitField can also be subclassed, the `fields` field being +in the format expected by, respectively, Union and BitField. + +The `addr` argument can be omited if an allocator is set, in which case the +structure will be automatically allocated in memory: + + my_heap = miasm2.os_dep.common.heap() + # the allocator is a func(VmMngr) -> integer_address + set_allocator(my_heap) + +Note that some structures (e.g. MemStr or MemArray) do not have a static +size and cannot be allocated automatically. +""" + +import logging +import struct + +log = logging.getLogger(__name__) +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + +# ALLOCATOR is a function(vm, size) -> allocated_address +# TODO: as a MemType class attribute +ALLOCATOR = None + +# Cache for dynamically generated MemTypes +DYN_MEM_STRUCT_CACHE = {} + +def set_allocator(alloc_func): + """Set an allocator for this module; allows to instanciate statically sized + MemTypes (i.e. sizeof() is implemented) without specifying the address + (the object is allocated by @alloc_func in the vm. + + @alloc_func: func(VmMngr) -> integer_address + """ + global ALLOCATOR + ALLOCATOR = alloc_func + + +# Helpers + +def indent(s, size=4): + """Indent a string with @size spaces""" + return ' '*size + ('\n' + ' '*size).join(s.split('\n')) + + +# FIXME: copied from miasm2.os_dep.common and fixed +def get_str_ansi(vm, addr, max_char=None): + """Get a null terminated ANSI encoded string from a VmMngr. + + @vm: VmMngr instance + @max_char: max number of characters to get in memory + """ + l = 0 + tmp = addr + while ((max_char is None or l < max_char) and + vm.get_mem(tmp, 1) != "\x00"): + tmp += 1 + l += 1 + return vm.get_mem(addr, l).decode("latin1") + + +# TODO: get_raw_str_utf16 for length calculus +def get_str_utf16(vm, addr, max_char=None): + """Get a (double) null terminated utf16 little endian encoded string from + a VmMngr. This encoding is mainly used in Windows. + + FIXME: the implementation do not work with codepoints that are encoded on + more than 2 bytes in utf16. + + @vm: VmMngr instance + @max_char: max number of bytes to get in memory + """ + l = 0 + tmp = addr + # TODO: test if fetching per page rather than 2 byte per 2 byte is worth it? + while ((max_char is None or l < max_char) and + vm.get_mem(tmp, 2) != "\x00\x00"): + tmp += 2 + l += 2 + s = vm.get_mem(addr, l) + return s.decode('utf-16le') + + +def set_str_ansi(vm, addr, s): + """Encode a string to null terminated ascii/ansi and set it in a VmMngr + memory. + + @vm: VmMngr instance + @addr: start address to serialize the string to + s: the str to serialize + """ + vm.set_mem(addr, s + "\x00") + + +def set_str_utf16(vm, addr, s): + """Same as set_str_ansi with (double) null terminated utf16 encoding.""" + s = (s + '\x00').encode('utf-16le') + vm.set_mem(addr, s) + + +# Type classes + +class Type(object): + """Base class to provide methods to describe a type, as well as how to set + and get fields from virtual mem. + + Each Type subclass is linked to a MemType subclass (e.g. Struct to + MemStruct, Ptr to MemPtr, etc.). + + When nothing is specified, MemValue is used to access the type in memory. + MemValue instances have one `.val` field, setting and getting it call + the set and get of the Type. + + Subclasses can either override _pack and _unpack, or get and set if data + serialization requires more work (see Struct implementation for an example). + + TODO: move any trace of vm and addr out of these classes? + """ + + _self_type = None + + def _pack(self, val): + """Serializes the python value @val to a raw str""" + raise NotImplementedError() + + def _unpack(self, raw_str): + """Deserializes a raw str to an object representing the python value + of this field. + """ + raise NotImplementedError() + + def set(self, vm, addr, val): + """Set a VmMngr memory from a value. + + @vm: VmMngr instance + @addr: the start adress in memory to set + @val: the python value to serialize in @vm at @addr + """ + raw = self._pack(val) + vm.set_mem(addr, raw) + + def get(self, vm, addr): + """Get the python value of a field from a VmMngr memory at @addr.""" + raw = vm.get_mem(addr, self.size()) + return self._unpack(raw) + + @property + def pinned(self): + """Returns a class with a (vm, addr) constructor that allows to + interact with this type in memory. + + @return: a MemType subclass. + """ + if self in DYN_MEM_STRUCT_CACHE: + return DYN_MEM_STRUCT_CACHE[self] + pinned_type = self._build_pinned_type() + DYN_MEM_STRUCT_CACHE[self] = pinned_type + return pinned_type + + def _build_pinned_type(self): + """Builds the MemType subclass allowing to interract with this type. + + Called by self.pinned when it is not in cache. + """ + pinned_base_class = self._get_pinned_base_class() + pinned_type = type("Mem%r" % self, (pinned_base_class,), + {'_type': self}) + return pinned_type + + def _get_pinned_base_class(self): + """Return the MemType subclass that maps this type in memory""" + return MemValue + + def _get_self_type(self): + """Used for the Self trick.""" + return self._self_type + + def _set_self_type(self, self_type): + """If this field refers to MemSelf/Self, replace it with @self_type + (a MemType subclass) when using it. Generally not used outside this + module. + """ + self._self_type = self_type + + def size(self): + """Return the size in bytes of the serialized version of this field""" + raise NotImplementedError() + + def __len__(self): + return self.size() + + def __neq__(self, other): + return not self == other + + +class RawStruct(Type): + """Dumb struct.pack/unpack field. Mainly used to factorize code. + + Value is a tuple corresponding to the struct @fmt passed to the constructor. + """ + + def __init__(self, fmt): + self._fmt = fmt + + def _pack(self, fields): + return struct.pack(self._fmt, *fields) + + def _unpack(self, raw_str): + return struct.unpack(self._fmt, raw_str) + + def size(self): + return struct.calcsize(self._fmt) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, self._fmt) + + def __eq__(self, other): + return self.__class__ == other.__class__ and self._fmt == other._fmt + + def __hash__(self): + return hash((self.__class__, self._fmt)) + + +class Num(RawStruct): + """Represents a number (integer or float). The number is encoded with + a struct-style format which must represent only one value. + + TODO: use u32, i16, etc. for format. + """ + + def _pack(self, number): + return super(Num, self)._pack([number]) + + def _unpack(self, raw_str): + upck = super(Num, self)._unpack(raw_str) + if len(upck) != 1: + raise ValueError("Num format string unpacks to multiple values, " + "should be 1") + return upck[0] + + +class Ptr(Num): + """Special case of number of which value indicates the address of a + MemType. + + Mapped to MemPtr (see its doc for more info): + + assert isinstance(mystruct.ptr, MemPtr) + mystruct.ptr = 0x4000 # Assign the Ptr numeric value + mystruct.ptr.val = 0x4000 # Also assigns the Ptr numeric value + assert isinstance(mystruct.ptr.val, int) # Get the Ptr numeric value + mystruct.ptr.deref # Get the pointed MemType + mystruct.ptr.deref = other # Set the pointed MemType + """ + + def __init__(self, fmt, dst_type, *type_args, **type_kwargs): + """ + @fmt: (str) Num compatible format that will be the Ptr representation + in memory + @dst_type: (MemType or Type) the MemType this Ptr points to. + If a Type is given, it is transformed into a MemType with + TheType.pinned. + *type_args, **type_kwargs: arguments to pass to the the pointed + MemType when instanciating it (e.g. for MemStr encoding or + MemArray field_type). + """ + if (not isinstance(dst_type, Type) and + not (isinstance(dst_type, type) and + issubclass(dst_type, MemType)) and + not dst_type == MemSelf): + raise ValueError("dst_type of Ptr must be a MemType type, a " + "Type instance, the MemSelf marker or a class " + "name.") + super(Ptr, self).__init__(fmt) + if isinstance(dst_type, Type): + # Patch the field to propagate the MemSelf replacement + dst_type._get_self_type = lambda: self._get_self_type() + # dst_type cannot be patched here, since _get_self_type of the outer + # class has not yet been set. Patching dst_type involves calling + # dst_type.pinned, which will only return a type that does not point + # on MemSelf but on the right class only when _get_self_type of the + # outer class has been replaced by _MetaMemStruct. + # In short, dst_type = dst_type.pinned is not valid here, it is done + # lazily in _fix_dst_type + self._dst_type = dst_type + self._type_args = type_args + self._type_kwargs = type_kwargs + + def _fix_dst_type(self): + if self._dst_type == MemSelf: + if self._get_self_type() is not None: + self._dst_type = self._get_self_type() + else: + raise ValueError("Unsupported usecase for MemSelf, sorry") + if isinstance(self._dst_type, Type): + self._dst_type = self._dst_type.pinned + + @property + def dst_type(self): + """Return the type (MemType subtype) this Ptr points to.""" + self._fix_dst_type() + return self._dst_type + + def set(self, vm, addr, val): + """A Ptr field can be set with a MemPtr or an int""" + if isinstance(val, MemType) and isinstance(val.get_type(), Ptr): + self.set_val(vm, addr, val.val) + else: + super(Ptr, self).set(vm, addr, val) + + def get(self, vm, addr): + return self.pinned(vm, addr) + + def get_val(self, vm, addr): + """Get the numeric value of a Ptr""" + return super(Ptr, self).get(vm, addr) + + def set_val(self, vm, addr, val): + """Set the numeric value of a Ptr""" + return super(Ptr, self).set(vm, addr, val) + + def deref_get(self, vm, addr): + """Deserializes the data in @vm (VmMngr) at @addr to self.dst_type. + Equivalent to a pointer dereference rvalue in C. + """ + dst_addr = self.get_val(vm, addr) + return self.dst_type(vm, dst_addr, + *self._type_args, **self._type_kwargs) + + def deref_set(self, vm, addr, val): + """Serializes the @val MemType subclass instance in @vm (VmMngr) at + @addr. Equivalent to a pointer dereference assignment in C. + """ + # Sanity check + if self.dst_type != val.__class__: + log.warning("Original type was %s, overriden by value of type %s", + self._dst_type.__name__, val.__class__.__name__) + + # Actual job + dst_addr = self.get_val(vm, addr) + vm.set_mem(dst_addr, str(val)) + + def _get_pinned_base_class(self): + return MemPtr + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self.dst_type.get_type()) + + def __eq__(self, other): + return super(Ptr, self).__eq__(other) and \ + self.dst_type == other.dst_type and \ + self._type_args == other._type_args and \ + self._type_kwargs == other._type_kwargs + + def __hash__(self): + return hash((super(Ptr, self).__hash__(), self.dst_type, + self._type_args)) + + +class Struct(Type): + """Equivalent to a C struct type. Composed of a name, and a + (, ) list describing the fields + of the struct. + + Mapped to MemStruct. + + NOTE: The `.pinned` property of Struct creates classes on the fly. If an + equivalent structure is created by subclassing MemStruct, an exception + is raised to prevent creating multiple classes designating the same type. + + Example: + s = Struct("Toto", [("f1", Num("I")), ("f2", Num("I"))]) + + Toto1 = s.pinned + + # This raises an exception, because it describes the same structure as + # Toto1 + class Toto(MemStruct): + fields = [("f1", Num("I")), ("f2", Num("I"))] + """ + + def __init__(self, name, fields): + self.name = name + # fields is immutable + self._fields = tuple(fields) + self._gen_fields() + + def _gen_fields(self): + """Precompute useful metadata on self.fields.""" + self._fields_desc = {} + offset = 0 + for name, field in self._fields: + # For reflexion + field._set_self_type(self) + self._fields_desc[name] = {"field": field, "offset": offset} + offset += field.size() + + @property + def fields(self): + return self._fields + + def set(self, vm, addr, val): + raw = str(val) + vm.set_mem(addr, raw) + + def get(self, vm, addr): + return self.pinned(vm, addr) + + def get_field(self, vm, addr, name): + """Get a field value by @name and base structure @addr in @vm VmMngr.""" + if name not in self._fields_desc: + raise ValueError("'%s' type has no field '%s'" % (self, name)) + field = self.get_field_type(name) + offset = self.get_offset(name) + return field.get(vm, addr + offset) + + def set_field(self, vm, addr, name, val): + """Set a field value by @name and base structure @addr in @vm VmMngr. + @val is the python value corresponding to this field type. + """ + if name not in self._fields_desc: + raise AttributeError("'%s' object has no attribute '%s'" + % (self.__class__.__name__, name)) + field = self.get_field_type(name) + offset = self.get_offset(name) + field.set(vm, addr + offset, val) + + def size(self): + return sum(field.size() for _, field in self.fields) + + def get_offset(self, field_name): + """ + @field_name: (str, optional) the name of the field to get the + offset of + """ + if field_name not in self._fields_desc: + raise ValueError("This structure has no %s field" % field_name) + return self._fields_desc[field_name]['offset'] + + def get_field_type(self, name): + """Return the Type subclass instance describing field @name.""" + return self._fields_desc[name]['field'] + + def _get_pinned_base_class(self): + return MemStruct + + def __repr__(self): + return "struct %s" % self.name + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self.fields == other.fields and \ + self.name == other.name + + def __hash__(self): + # Only hash name, not fields, because if a field is a Ptr to this + # Struct type, an infinite loop occurs + return hash((self.__class__, self.name)) + + +class Union(Struct): + """Represents a C union. + + Allows to put multiple fields at the same offset in a MemStruct, + similar to unions in C. The Union will have the size of the largest of its + fields. + + Mapped to MemUnion. + + Example: + + class Example(MemStruct): + fields = [("uni", Union([ + ("f1", Num("= self.size()): + raise IndexError("Index %s out of bounds" % idx) + + def _get_pinned_base_class(self): + if self.is_sized(): + return MemSizedArray + else: + return MemArray + + def __repr__(self): + return "%r[%s]" % (self.field_type, self.array_len or "unsized") + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self.field_type == other.field_type and \ + self.array_len == other.array_len + + def __hash__(self): + return hash((self.__class__, self.field_type, self.array_len)) + + +class Bits(Type): + """Helper class for BitField, not very useful on its own. Represents some + bits of a Num. + + The @backing_num is used to know how to serialize/deserialize data in vm, + but getting/setting this fields only affects bits from @bit_offset to + @bit_offset + @bits. Masking and shifting is handled by the class, the aim + is to provide a transparent way to set and get some bits of a num. + """ + + def __init__(self, backing_num, bits, bit_offset): + if not isinstance(backing_num, Num): + raise ValueError("backing_num should be a Num instance") + self._num = backing_num + self._bits = bits + self._bit_offset = bit_offset + + def set(self, vm, addr, val): + val_mask = (1 << self._bits) - 1 + val_shifted = (val & val_mask) << self._bit_offset + num_size = self._num.size() * 8 + + full_num_mask = (1 << num_size) - 1 + num_mask = (~(val_mask << self._bit_offset)) & full_num_mask + + num_val = self._num.get(vm, addr) + res_val = (num_val & num_mask) | val_shifted + self._num.set(vm, addr, res_val) + + def get(self, vm, addr): + val_mask = (1 << self._bits) - 1 + num_val = self._num.get(vm, addr) + res_val = (num_val >> self._bit_offset) & val_mask + return res_val + + def size(self): + return self._num.size() + + @property + def bit_size(self): + """Number of bits read/written by this class""" + return self._bits + + @property + def bit_offset(self): + """Offset in bits (beginning at 0, the LSB) from which to read/write + bits. + """ + return self._bit_offset + + def __repr__(self): + return "%s%r(%d:%d)" % (self.__class__.__name__, self._num, + self._bit_offset, self._bit_offset + self._bits) + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self._num == other._num and self._bits == other._bits and \ + self._bit_offset == other._bit_offset + + def __hash__(self): + return hash((self.__class__, self._num, self._bits, self._bit_offset)) + + +class BitField(Union): + """A C-like bitfield. + + Constructed with a list [(, )] and a + @backing_num. The @backing_num is a Num instance that determines the total + size of the bitfield and the way the bits are serialized/deserialized (big + endian int, little endian short...). Can be seen (and implemented) as a + Union of Bits fields. + + Mapped to MemBitField. + + Creates fields that allow to access the bitfield fields easily. Example: + + class Example(MemStruct): + fields = [("bf", BitField(Num("B"), [ + ("f1", 2), + ("f2", 4), + ("f3", 1) + ]) + )] + + ex = Example(vm, addr) + ex.memset() + ex.f2 = 2 + ex.f1 = 5 # 5 does not fit on two bits, it will be binarily truncated + assert ex.f1 == 3 + assert ex.f2 == 2 + assert ex.f3 == 0 # previously memset() + assert ex.bf == 3 + 2 << 2 + """ + + def __init__(self, backing_num, bit_list): + """@backing num: Num intance, @bit_list: [(name, n_bits)]""" + self._num = backing_num + fields = [] + offset = 0 + for name, bits in bit_list: + fields.append((name, Bits(self._num, bits, offset))) + offset += bits + if offset > self._num.size() * 8: + raise ValueError("sum of bit lengths is > to the backing num size") + super(BitField, self).__init__(fields) + + def set(self, vm, addr, val): + self._num.set(vm, addr, val) + + def _get_pinned_base_class(self): + return MemBitField + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self._num == other._num and super(BitField, self).__eq__(other) + + def __hash__(self): + return hash((super(BitField, self).__hash__(), self._num)) + + def __repr__(self): + fields_repr = ', '.join("%s: %r" % (name, field.bit_size) + for name, field in self.fields) + return "%s(%s)" % (self.__class__.__name__, fields_repr) + + +class Str(Type): + """A string type that handles encoding. This type is unsized (no static + size). + + The @encoding is passed to the constructor, and is currently either null + terminated "ansi" (latin1) or (double) null terminated "utf16". Be aware + that the utf16 implementation is a bit buggy... + + Mapped to MemStr. + """ + + def __init__(self, encoding="ansi"): + # TODO: encoding as lambda + if encoding not in ["ansi", "utf16"]: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + self._enc = encoding + + def get(self, vm, addr): + """Set the string value in memory""" + if self._enc == "ansi": + get_str = get_str_ansi + elif self._enc == "utf16": + get_str = get_str_utf16 + else: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + return get_str(vm, addr) + + def set(self, vm, addr, s): + """Get the string value from memory""" + if self._enc == "ansi": + set_str = set_str_ansi + elif self._enc == "utf16": + set_str = set_str_utf16 + else: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + set_str(vm, addr, s) + + def size(self): + """This type is unsized.""" + raise ValueError("Str is unsized") + + @property + def enc(self): + """This Str's encoding name (as a str).""" + return self._enc + + def _get_pinned_base_class(self): + return MemStr + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, self.enc) + + def __eq__(self, other): + return self.__class__ == other.__class__ and self._enc == other._enc + + def __hash__(self): + return hash((self.__class__, self._enc)) + + +class Void(Type): + """Represents the C void type. + + Mapped to MemVoid. + """ + + def _build_pinned_type(self): + return MemVoid + + def __eq__(self, other): + return self.__class__ == other.__class__ + + def __hash__(self): + return hash(self.__class__) + + +class Self(Void): + """Special marker to reference a type inside itself. + + Mapped to MemSelf. + + Example: + class ListNode(MemStruct): + fields = [ + ("next", Ptr(", ) + - instances of this class will have properties to interract with these + fields. + + Example: + class MyStruct(MemStruct): + fields = [ + # Scalar field: just struct.pack field with one value + ("num", Num("I")), + ("flags", Num("B")), + # Ptr fields contain two fields: "val", for the numerical value, + # and "deref" to get the pointed object + ("other", Ptr("I", OtherStruct)), + # Ptr to a variable length String + ("s", Ptr("I", Str())), + ("i", Ptr("I", Num("I"))), + ] + + mstruct = MyStruct(vm, addr) + + # Field assignment modifies virtual memory + mstruct.num = 3 + assert mstruct.num == 3 + memval = struct.unpack("I", vm.get_mem(mstruct.get_addr(), + 4))[0] + assert memval == mstruct.num + + # Memset sets the whole structure + mstruct.memset() + assert mstruct.num == 0 + mstruct.memset('\x11') + assert mstruct.num == 0x11111111 + + other = OtherStruct(vm, addr2) + mstruct.other = other.get_addr() + assert mstruct.other.val == other.get_addr() + assert mstruct.other.deref == other + assert mstruct.other.deref.foo == 0x1234 + + Note that: + MyStruct = Struct("MyStruct", ).pinned + is equivalent to the previous MyStruct declaration. + + See the various Type-s doc for more information. See MemStruct.gen_fields + doc for more information on how to handle recursive types and cyclic + dependencies. + """ + __metaclass__ = _MetaMemStruct + fields = None + + def get_addr(self, field_name=None): + """ + @field_name: (str, optional) the name of the field to get the + address of + """ + if field_name is not None: + offset = self._type.get_offset(field_name) + else: + offset = 0 + return self._addr + offset + + def get_field(self, name): + """Get a field value by name. + + useless most of the time since fields are accessible via self.. + """ + return self._type.get_field(self._vm, self.get_addr(), name) + + def set_field(self, name, val): + """Set a field value by name. @val is the python value corresponding to + this field type. + + useless most of the time since fields are accessible via self.. + """ + return self._type.set_field(self._vm, self.get_addr(), name, val) + + def cast_field(self, field, other_type): + """In this implementation, @field is a field name""" + if isinstance(other_type, Type): + other_type = other_type.pinned + return other_type(self._vm, self.get_addr(field)) + + # Field generation method, voluntarily public to be able to gen fields + # after class definition + @classmethod + def gen_fields(cls, fields=None): + """Generate the fields of this class (so that they can be accessed with + self.) from a @fields list, as described in the class doc. + + Useful in case of a type cyclic dependency. For example, the following + is not possible in python: + + class A(MemStruct): + fields = [("b", Ptr("I", B))] + + class B(MemStruct): + fields = [("a", Ptr("I", A))] + + With gen_fields, the following is the legal equivalent: + + class A(MemStruct): + pass + + class B(MemStruct): + fields = [("a", Ptr("I", A))] + + A.gen_fields([("b", Ptr("I", B))]) + """ + if fields is not None: + if cls.fields is not None: + raise ValueError("Cannot regen fields of a class. Setting " + "cls.fields at class definition and calling " + "gen_fields are mutually exclusive.") + cls.fields = fields + + if cls._type is None: + if cls.fields is None: + raise ValueError("Cannot create a MemStruct subclass without" + " a cls._type or a cls.fields") + cls._type = cls._gen_type(cls.fields) + + if cls._type in DYN_MEM_STRUCT_CACHE: + # FIXME: Maybe a warning would be better? + raise RuntimeError("Another MemType has the same type as this " + "one. Use it instead.") + + # Register this class so that another one will not be created when + # calling cls._type.pinned + DYN_MEM_STRUCT_CACHE[cls._type] = cls + + cls._gen_attributes() + + @classmethod + def _gen_attributes(cls): + # Generate self. getter and setters + for name, field in cls._type.fields: + setattr(cls, name, property( + lambda self, name=name: self.get_field(name), + lambda self, val, name=name: self.set_field(name, val) + )) + + @classmethod + def _gen_type(cls, fields): + return Struct(cls.__name__, fields) + + def __repr__(self): + out = [] + for name, field in self._type.fields: + val_repr = repr(self.get_field(name)) + if '\n' in val_repr: + val_repr = '\n' + indent(val_repr, 4) + out.append("%s: %r = %s" % (name, field, val_repr)) + return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) + + +class MemUnion(MemStruct): + """Same as MemStruct but all fields have a 0 offset in the struct.""" + @classmethod + def _gen_type(cls, fields): + return Union(fields) + + +class MemBitField(MemUnion): + """MemUnion of Bits(...) fields.""" + @classmethod + def _gen_type(cls, fields): + return BitField(fields) + + +class MemSelf(MemStruct): + """Special Marker class for reference to current class in a Ptr or Array + (mostly Array of Ptr). See Self doc. + """ + def __repr__(self): + return self.__class__.__name__ + + +class MemVoid(MemType): + """Placeholder for e.g. Ptr to an undetermined type. Useful mostly when + casted to another type. Allows to implement C's "void*" pattern. + """ + _type = Void() + + def __repr__(self): + return self.__class__.__name__ + + +class MemPtr(MemValue): + """Mem version of a Ptr, provides two properties: + - val, to set and get the numeric value of the Ptr + - deref, to set and get the pointed type + """ + @property + def val(self): + return self._type.get_val(self._vm, self._addr) + + @val.setter + def val(self, value): + return self._type.set_val(self._vm, self._addr, value) + + @property + def deref(self): + return self._type.deref_get(self._vm, self._addr) + + @deref.setter + def deref(self, val): + return self._type.deref_set(self._vm, self._addr, val) + + def __repr__(self): + return "*%s" % hex(self.val) + + +class MemStr(MemValue): + """Implements a string representation in memory. + + The string value can be got or set (with python str/unicode) through the + self.val attribute. String encoding/decoding is handled by the class, + + This type is dynamically sized only (get_size is implemented, not sizeof). + """ + + def get_size(self): + """This get_size implementation is quite unsafe: it reads the string + underneath to determine the size, it may therefore read a lot of memory + and provoke mem faults (analogous to strlen). + """ + val = self.val + if self.get_type().enc == "ansi": + return len(val) + 1 + elif self.get_type().enc == "utf16": + # FIXME: real encoding... + return len(val) * 2 + 2 + else: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + + def raw(self): + raw = self._vm.get_mem(self.get_addr(), self.get_size()) + return raw + + def __repr__(self): + return "%r: %r" % (self.__class__, self.val) + + +class MemArray(MemType): + """An unsized array of type @field_type (a Type subclass instance). + This class has no static or dynamic size. + + It can be indexed for setting and getting elements, example: + + array = Array(Num("I")).pinned(vm, addr)) + array[2] = 5 + array[4:8] = [0, 1, 2, 3] + print array[20] + """ + + @property + def field_type(self): + """Return the Type subclass instance that represents the type of + this MemArray items. + """ + return self.get_type().field_type + + def get_addr(self, idx=0): + return self._addr + self.get_type().get_offset(idx) + + def __getitem__(self, idx): + return self.get_type().get_item(self._vm, self._addr, idx) + + def __setitem__(self, idx, item): + self.get_type().set_item(self._vm, self._addr, idx, item) + + def raw(self): + raise ValueError("%s is unsized, which prevents from getting its full " + "raw representation. Use MemSizedArray instead." % + self.__class__) + + def __repr__(self): + return "[%r, ...] [%r]" % (self[0], self.field_type) + + +class MemSizedArray(MemArray): + """A fixed size MemArray. + + This type is dynamically sized. Generate a fixed @field_type and @array_len + array which has a static size by using Array(type, size).pinned. + """ + + @property + def array_len(self): + """The length, in number of elements, of this array.""" + return self.get_type().array_len + + def get_size(self): + return self.get_type().size() + + def __iter__(self): + for i in xrange(self.get_type().array_len): + yield self[i] + + def raw(self): + return self._vm.get_mem(self.get_addr(), self.get_size()) + + def __repr__(self): + item_reprs = [repr(item) for item in self] + if self.array_len > 0 and '\n' in item_reprs[0]: + items = '\n' + indent(',\n'.join(item_reprs), 2) + '\n' + else: + items = ', '.join(item_reprs) + return "[%s] [%r; %s]" % (items, self.field_type, self.array_len) + diff --git a/test/analysis/mem.py b/test/analysis/mem.py deleted file mode 100644 index 6c7fc9e3..00000000 --- a/test/analysis/mem.py +++ /dev/null @@ -1,506 +0,0 @@ -#!/usr/bin/env python - -# miasm2.analysis.mem tests - -import struct - -from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import MemStruct, Num, Ptr, Str, \ - Array, RawStruct, Union, \ - BitField, Self, Void, Bits, \ - set_allocator, MemUnion, Struct -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE -from miasm2.os_dep.common import heap - -# Two structures with some fields -class OtherStruct(MemStruct): - fields = [ - ("foo", Num("H")), - ] - -class MyStruct(MemStruct): - fields = [ - # Number field: just struct.pack fields with one value - ("num", Num("I")), - ("flags", Num("B")), - # This field is a pointer to another struct, it has a numeric - # value (mystruct.other.val) and can be dereferenced to get an - # OtherStruct instance (mystruct.other.deref) - ("other", Ptr("I", OtherStruct)), - # Ptr to a variable length String - ("s", Ptr("I", Str())), - ("i", Ptr("I", Num("I"))), - ] - -jitter = Machine("x86_32").jitter("python") -jitter.init_stack() -addr = 0x1000 -size = 0x1000 -addr_str = 0x1100 -addr_str2 = 0x1200 -addr_str3 = 0x1300 -# Initialize all mem with 0xaa -jitter.vm.add_memory_page(addr, PAGE_READ | PAGE_WRITE, "\xaa"*size) - - -# MemStruct tests -## Creation -# Use manual allocation with explicit addr for the first example -mstruct = MyStruct(jitter.vm, addr) -## Fields are read from the virtual memory -assert mstruct.num == 0xaaaaaaaa -assert mstruct.flags == 0xaa - -## Field assignment modifies virtual memory -mstruct.num = 3 -assert mstruct.num == 3 -memval = struct.unpack("I", jitter.vm.get_mem(mstruct.get_addr(), 4))[0] -assert memval == 3 - -## Memset sets the whole structure -mstruct.memset() -assert mstruct.num == 0 -assert mstruct.flags == 0 -assert mstruct.other.val == 0 -assert mstruct.s.val == 0 -assert mstruct.i.val == 0 -mstruct.memset('\x11') -assert mstruct.num == 0x11111111 -assert mstruct.flags == 0x11 -assert mstruct.other.val == 0x11111111 -assert mstruct.s.val == 0x11111111 -assert mstruct.i.val == 0x11111111 - - -# From now, just use heap.vm_alloc -my_heap = heap() -set_allocator(my_heap.vm_alloc) - - -# Ptr tests -## Setup for Ptr tests -# the addr field can now be omited since allocator is set -other = OtherStruct(jitter.vm) -other.foo = 0x1234 -assert other.foo == 0x1234 - -## Basic usage -mstruct.other.val = other.get_addr() -# This also works for now: -# mstruct.other = other.get_addr() -assert mstruct.other.val == other.get_addr() -assert mstruct.other.deref == other -assert mstruct.other.deref.foo == 0x1234 - -## Deref assignment -other2 = OtherStruct(jitter.vm) -other2.foo = 0xbeef -assert mstruct.other.deref != other2 -mstruct.other.deref = other2 -assert mstruct.other.deref == other2 -assert mstruct.other.deref.foo == 0xbeef -assert mstruct.other.val == other.get_addr() # Addr did not change -assert other.foo == 0xbeef # Deref assignment copies by value -assert other2.foo == 0xbeef -assert other.get_addr() != other2.get_addr() # Not the same address -assert other == other2 # But same value - -## Same stuff for Ptr to MemField -alloc_addr = my_heap.vm_alloc(jitter.vm, - mstruct.get_type().get_field_type("i") - .dst_type.sizeof()) -mstruct.i = alloc_addr -mstruct.i.deref.val = 8 -assert mstruct.i.deref.val == 8 -assert mstruct.i.val == alloc_addr -memval = struct.unpack("I", jitter.vm.get_mem(alloc_addr, 4))[0] -assert memval == 8 - - -# Str tests -## Basic tests -memstr = Str().pinned(jitter.vm, addr_str) -memstr.val = "" -assert memstr.val == "" -assert jitter.vm.get_mem(memstr.get_addr(), 1) == '\x00' -memstr.val = "lala" -assert jitter.vm.get_mem(memstr.get_addr(), memstr.get_size()) == 'lala\x00' -jitter.vm.set_mem(memstr.get_addr(), 'MIAMs\x00') -assert memstr.val == 'MIAMs' - -## Ptr(Str()) manipulations -mstruct.s.val = memstr.get_addr() -assert mstruct.s.val == addr_str -assert mstruct.s.deref == memstr -assert mstruct.s.deref.val == 'MIAMs' -mstruct.s.deref.val = "That's all folks!" -assert mstruct.s.deref.val == "That's all folks!" -assert memstr.val == "That's all folks!" - -## Other address, same value, same encoding -memstr2 = Str().pinned(jitter.vm, addr_str2) -memstr2.val = "That's all folks!" -assert memstr2.get_addr() != memstr.get_addr() -assert memstr2 == memstr - -## Same value, other encoding -memstr3 = Str("utf16").pinned(jitter.vm, addr_str3) -memstr3.val = "That's all folks!" -assert memstr3.get_addr() != memstr.get_addr() -assert memstr3.get_size() != memstr.get_size() # Size is different -assert str(memstr3) != str(memstr) # Mem representation is different -assert memstr3 != memstr # Encoding is different, so they are not eq -assert memstr3.val == memstr.val # But the python value is the same - - -# Array tests -# Allocate buffer manually, since memarray is unsized -alloc_addr = my_heap.vm_alloc(jitter.vm, 0x100) -memarray = Array(Num("I")).pinned(jitter.vm, alloc_addr) -memarray[0] = 0x02 -assert memarray[0] == 0x02 -assert jitter.vm.get_mem(memarray.get_addr(), - Num("I").size()) == '\x02\x00\x00\x00' -memarray[2] = 0xbbbbbbbb -assert memarray[2] == 0xbbbbbbbb -assert jitter.vm.get_mem(memarray.get_addr() + 2 * Num("I").size(), - Num("I").size()) == '\xbb\xbb\xbb\xbb' -try: - s = str(memarray) - assert False, "Should raise" -except (NotImplementedError, ValueError): - pass -try: - s = len(memarray) - assert False, "Should raise" -except (NotImplementedError, ValueError): - pass - -## Slice assignment -memarray[2:4] = [3, 3] -assert memarray[2] == 3 -assert memarray[3] == 3 -assert memarray[2:4] == [3, 3] -try: - memarray[2:4] = [3, 3, 3] - assert False, "Should raise, mismatched sizes" -except ValueError: - pass - - -memsarray = Array(Num("I"), 10).pinned(jitter.vm) -# And Array(type, size).pinned generates statically sized types -assert memsarray.sizeof() == Num("I").size() * 10 -memsarray.memset('\xcc') -assert memsarray[0] == 0xcccccccc -assert len(memsarray) == 10 * 4 -assert str(memsarray) == '\xcc' * (4 * 10) -for val in memsarray: - assert val == 0xcccccccc -assert list(memsarray) == [0xcccccccc] * 10 -memsarray[0] = 2 -assert memsarray[0] == 2 -assert str(memsarray) == '\x02\x00\x00\x00' + '\xcc' * (4 * 9) - - -# Atypical fields (RawStruct and Array) -class MyStruct2(MemStruct): - fields = [ - ("s1", RawStruct("=BI")), - ("s2", Array(Num("B"), 10)), - ] - -ms2 = MyStruct2(jitter.vm) -ms2.memset('\xaa') -assert len(ms2) == 15 - -## RawStruct -assert len(ms2.s1) == 2 -assert ms2.s1[0] == 0xaa -assert ms2.s1[1] == 0xaaaaaaaa - -## Array -### Basic checks -assert len(ms2.s2) == 10 -for val in ms2.s2: - assert val == 0xaa -assert ms2.s2[0] == 0xaa -assert ms2.s2[9] == 0xaa - -### Subscript assignment -ms2.s2[3] = 2 -assert ms2.s2[3] == 2 - -### Field assignment (list) -ms2.s2 = [1] * 10 -for val in ms2.s2: - assert val == 1 - -### Field assignment (MemSizedArray) -array2 = Array(Num("B"), 10).pinned(jitter.vm) -jitter.vm.set_mem(array2.get_addr(), '\x02'*10) -for val in array2: - assert val == 2 -ms2.s2 = array2 -for val in ms2.s2: - assert val == 2 - - -# Inlining a MemType tests -class InStruct(MemStruct): - fields = [ - ("foo", Num("B")), - ("bar", Num("B")), - ] - -class ContStruct(MemStruct): - fields = [ - ("one", Num("B")), - ("instruct", InStruct.get_type()), - ("last", Num("B")), - ] - -cont = ContStruct(jitter.vm) -cont.memset() -assert len(cont) == 4 -assert len(cont.instruct) == 2 -assert cont.one == 0 -assert cont.last == 0 -assert cont.instruct.foo == 0 -assert cont.instruct.bar == 0 -cont.memset('\x11') -assert cont.one == 0x11 -assert cont.last == 0x11 -assert cont.instruct.foo == 0x11 -assert cont.instruct.bar == 0x11 - -cont.one = 0x01 -cont.instruct.foo = 0x02 -cont.instruct.bar = 0x03 -cont.last = 0x04 -assert cont.one == 0x01 -assert cont.instruct.foo == 0x02 -assert cont.instruct.bar == 0x03 -assert cont.last == 0x04 -assert jitter.vm.get_mem(cont.get_addr(), len(cont)) == '\x01\x02\x03\x04' - - -# Union test -class UniStruct(MemStruct): - fields = [ - ("one", Num("B")), - ("union", Union([ - ("instruct", InStruct.get_type()), - ("i", Num(">I")), - ])), - ("last", Num("B")), - ] - -uni = UniStruct(jitter.vm) -jitter.vm.set_mem(uni.get_addr(), ''.join(chr(x) for x in xrange(len(uni)))) -assert len(uni) == 6 # 1 + max(InStruct.sizeof(), 4) + 1 -assert uni.one == 0x00 -assert uni.union.instruct.foo == 0x01 -assert uni.union.instruct.bar == 0x02 -assert uni.union.i == 0x01020304 -assert uni.last == 0x05 -uni.union.instruct.foo = 0x02 -assert uni.union.i == 0x02020304 -uni.union.i = 0x11223344 -assert uni.union.instruct.foo == 0x11 -assert uni.union.instruct.bar == 0x22 - - -# BitField test -class BitStruct(MemUnion): - fields = [ - ("flags_num", Num("H")), - ("flags", BitField(Num("H"), [ - ("f1_1", 1), - ("f2_5", 5), - ("f3_8", 8), - ("f4_1", 1), - ])), - ] - -bit = BitStruct(jitter.vm) -bit.memset() -assert bit.flags_num == 0 -assert bit.flags.f1_1 == 0 -assert bit.flags.f2_5 == 0 -assert bit.flags.f3_8 == 0 -assert bit.flags.f4_1 == 0 -bit.flags.f1_1 = 1 -bit.flags.f2_5 = 0b10101 -bit.flags.f3_8 = 0b10000001 -assert bit.flags_num == 0b0010000001101011 -assert bit.flags.f1_1 == 1 -assert bit.flags.f2_5 == 0b10101 -assert bit.flags.f3_8 == 0b10000001 -assert bit.flags.f4_1 == 0 -bit.flags_num = 0b1101010101011100 -assert bit.flags.f1_1 == 0 -assert bit.flags.f2_5 == 0b01110 -assert bit.flags.f3_8 == 0b01010101 -assert bit.flags.f4_1 == 1 - - -# Unhealthy ideas -class UnhealthyIdeas(MemStruct): - fields = [ - ("pastruct", Ptr("I", Array(RawStruct("=Bf")))), - ("apstr", Array(Ptr("I", Str()), 10)), - ("pself", Ptr("I", Self())), - ("apself", Array(Ptr("I", Self()), 2)), - ("ppself", Ptr("I", Ptr("I", Self()))), - ("pppself", Ptr("I", Ptr("I", Ptr("I", Self())))), - ] - -p_size = Ptr("I", Void()).size() - -ideas = UnhealthyIdeas(jitter.vm) -ideas.memset() -ideas.pself = ideas.get_addr() -assert ideas == ideas.pself.deref - -ideas.apself[0] = ideas.get_addr() -assert ideas.apself[0].deref == ideas -ideas.apself[1] = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) -ideas.apself[1].deref = ideas -assert ideas.apself[1] != ideas.get_addr() -assert ideas.apself[1].deref == ideas - -ideas.ppself = my_heap.vm_alloc(jitter.vm, p_size) -ideas.ppself.deref.val = ideas.get_addr() -assert ideas.ppself.deref.val == ideas.get_addr() -assert ideas.ppself.deref.deref == ideas - -ideas.ppself.deref.val = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) -ideas.ppself.deref.deref = ideas -assert ideas.ppself.deref.val != ideas.get_addr() -assert ideas.ppself.deref.deref == ideas - -ideas.pppself = my_heap.vm_alloc(jitter.vm, p_size) -ideas.pppself.deref.val = my_heap.vm_alloc(jitter.vm, p_size) -ideas.pppself.deref.deref.val = ideas.get_addr() -assert ideas.pppself.deref.deref.deref == ideas - - -# Circular dependencies -class A(MemStruct): - pass - -class B(MemStruct): - fields = [("a", Ptr("I", A)),] - -# Gen A's fields after declaration -A.gen_fields([("b", Ptr("I", B)),]) - -a = A(jitter.vm) -b = B(jitter.vm) -a.b.val = b.get_addr() -b.a.val = a.get_addr() -assert a.b.deref == b -assert b.a.deref == a - - -# Cast tests -# MemStruct cast -MemInt = Num("I").pinned -MemShort = Num("H").pinned -dword = MemInt(jitter.vm) -dword.val = 0x12345678 -assert isinstance(dword.cast(MemShort), MemShort) -assert dword.cast(MemShort).val == 0x5678 - -# Field cast -ms2.s2[0] = 0x34 -ms2.s2[1] = 0x12 -assert ms2.cast_field("s2", MemShort).val == 0x1234 - -# Other method -assert MemShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 - -# Manual cast inside an Array -ms2.s2[4] = 0xcd -ms2.s2[5] = 0xab -assert MemShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd - -# void* style cast -MemPtrVoid = Ptr("I", Void()).pinned -p = MemPtrVoid(jitter.vm) -p.val = mstruct.get_addr() -assert p.deref.cast(MyStruct) == mstruct -assert p.cast(Ptr("I", MyStruct)).deref == mstruct - -# Field equality tests -assert RawStruct("IH") == RawStruct("IH") -assert RawStruct("I") != RawStruct("IH") -assert Num("I") == Num("I") -assert Num(">I") != Num("I", MyStruct) != Ptr("I")), + ])), + ("last", Num("B")), + ] + +uni = UniStruct(jitter.vm) +jitter.vm.set_mem(uni.get_addr(), ''.join(chr(x) for x in xrange(len(uni)))) +assert len(uni) == 6 # 1 + max(InStruct.sizeof(), 4) + 1 +assert uni.one == 0x00 +assert uni.union.instruct.foo == 0x01 +assert uni.union.instruct.bar == 0x02 +assert uni.union.i == 0x01020304 +assert uni.last == 0x05 +uni.union.instruct.foo = 0x02 +assert uni.union.i == 0x02020304 +uni.union.i = 0x11223344 +assert uni.union.instruct.foo == 0x11 +assert uni.union.instruct.bar == 0x22 + + +# BitField test +class BitStruct(MemUnion): + fields = [ + ("flags_num", Num("H")), + ("flags", BitField(Num("H"), [ + ("f1_1", 1), + ("f2_5", 5), + ("f3_8", 8), + ("f4_1", 1), + ])), + ] + +bit = BitStruct(jitter.vm) +bit.memset() +assert bit.flags_num == 0 +assert bit.flags.f1_1 == 0 +assert bit.flags.f2_5 == 0 +assert bit.flags.f3_8 == 0 +assert bit.flags.f4_1 == 0 +bit.flags.f1_1 = 1 +bit.flags.f2_5 = 0b10101 +bit.flags.f3_8 = 0b10000001 +assert bit.flags_num == 0b0010000001101011 +assert bit.flags.f1_1 == 1 +assert bit.flags.f2_5 == 0b10101 +assert bit.flags.f3_8 == 0b10000001 +assert bit.flags.f4_1 == 0 +bit.flags_num = 0b1101010101011100 +assert bit.flags.f1_1 == 0 +assert bit.flags.f2_5 == 0b01110 +assert bit.flags.f3_8 == 0b01010101 +assert bit.flags.f4_1 == 1 + + +# Unhealthy ideas +class UnhealthyIdeas(MemStruct): + fields = [ + ("pastruct", Ptr("I", Array(RawStruct("=Bf")))), + ("apstr", Array(Ptr("I", Str()), 10)), + ("pself", Ptr("I", Self())), + ("apself", Array(Ptr("I", Self()), 2)), + ("ppself", Ptr("I", Ptr("I", Self()))), + ("pppself", Ptr("I", Ptr("I", Ptr("I", Self())))), + ] + +p_size = Ptr("I", Void()).size() + +ideas = UnhealthyIdeas(jitter.vm) +ideas.memset() +ideas.pself = ideas.get_addr() +assert ideas == ideas.pself.deref + +ideas.apself[0] = ideas.get_addr() +assert ideas.apself[0].deref == ideas +ideas.apself[1] = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) +ideas.apself[1].deref = ideas +assert ideas.apself[1] != ideas.get_addr() +assert ideas.apself[1].deref == ideas + +ideas.ppself = my_heap.vm_alloc(jitter.vm, p_size) +ideas.ppself.deref.val = ideas.get_addr() +assert ideas.ppself.deref.val == ideas.get_addr() +assert ideas.ppself.deref.deref == ideas + +ideas.ppself.deref.val = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) +ideas.ppself.deref.deref = ideas +assert ideas.ppself.deref.val != ideas.get_addr() +assert ideas.ppself.deref.deref == ideas + +ideas.pppself = my_heap.vm_alloc(jitter.vm, p_size) +ideas.pppself.deref.val = my_heap.vm_alloc(jitter.vm, p_size) +ideas.pppself.deref.deref.val = ideas.get_addr() +assert ideas.pppself.deref.deref.deref == ideas + + +# Circular dependencies +class A(MemStruct): + pass + +class B(MemStruct): + fields = [("a", Ptr("I", A)),] + +# Gen A's fields after declaration +A.gen_fields([("b", Ptr("I", B)),]) + +a = A(jitter.vm) +b = B(jitter.vm) +a.b.val = b.get_addr() +b.a.val = a.get_addr() +assert a.b.deref == b +assert b.a.deref == a + + +# Cast tests +# MemStruct cast +MemInt = Num("I").pinned +MemShort = Num("H").pinned +dword = MemInt(jitter.vm) +dword.val = 0x12345678 +assert isinstance(dword.cast(MemShort), MemShort) +assert dword.cast(MemShort).val == 0x5678 + +# Field cast +ms2.s2[0] = 0x34 +ms2.s2[1] = 0x12 +assert ms2.cast_field("s2", MemShort).val == 0x1234 + +# Other method +assert MemShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 + +# Manual cast inside an Array +ms2.s2[4] = 0xcd +ms2.s2[5] = 0xab +assert MemShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd + +# void* style cast +MemPtrVoid = Ptr("I", Void()).pinned +p = MemPtrVoid(jitter.vm) +p.val = mstruct.get_addr() +assert p.deref.cast(MyStruct) == mstruct +assert p.cast(Ptr("I", MyStruct)).deref == mstruct + +# Field equality tests +assert RawStruct("IH") == RawStruct("IH") +assert RawStruct("I") != RawStruct("IH") +assert Num("I") == Num("I") +assert Num(">I") != Num("I", MyStruct) != Ptr(" Date: Fri, 4 Dec 2015 10:16:37 +0100 Subject: MemStruct/Types: pinned renamed to lval --- example/jitter/types.py | 2 ++ miasm2/core/types.py | 79 ++++++++++++++++++++++++++++++++++--------------- test/core/types.py | 44 +++++++++++++-------------- 3 files changed, 79 insertions(+), 46 deletions(-) (limited to 'example') diff --git a/example/jitter/types.py b/example/jitter/types.py index 6d8543b4..f4a7ddb4 100644 --- a/example/jitter/types.py +++ b/example/jitter/types.py @@ -229,6 +229,8 @@ print "See that the original array has been modified:" print repr(data) print +# TODO: type manipulation examples + print "See test/core/types.py and the miasm2.core.types module doc for " print "more information." diff --git a/miasm2/core/types.py b/miasm2/core/types.py index 3c8d5b8b..d6bc3cf5 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -1,5 +1,34 @@ -"""This module provides classes to manipulate C structures backed by a VmMngr -object (a miasm sandbox virtual memory). +"""This module provides classes to manipulate pure C types as well as their +representation in memory. A typical usecase is to use this module to +easily manipylate structures backed by a VmMngr object (a miasm sandbox virtual +memory): + + class ListNode(MemStruct): + fields = [ + ("next", Ptr(", ): - # FIXME: "I" => "u32" class MyStruct(MemStruct): fields = [ # Scalar field: just struct.pack field with one value @@ -210,10 +238,13 @@ class Type(object): return self._unpack(raw) @property - def pinned(self): + def lval(self): """Returns a class with a (vm, addr) constructor that allows to interact with this type in memory. + In compilation terms, it returns a class allowing to instanciate an + lvalue of this type. + @return: a MemType subclass. """ if self in DYN_MEM_STRUCT_CACHE: @@ -225,7 +256,7 @@ class Type(object): def _build_pinned_type(self): """Builds the MemType subclass allowing to interract with this type. - Called by self.pinned when it is not in cache. + Called by self.lval when it is not in cache. """ pinned_base_class = self._get_pinned_base_class() pinned_type = type("Mem%r" % self, (pinned_base_class,), @@ -324,7 +355,7 @@ class Ptr(Num): in memory @dst_type: (MemType or Type) the MemType this Ptr points to. If a Type is given, it is transformed into a MemType with - TheType.pinned. + TheType.lval. *type_args, **type_kwargs: arguments to pass to the the pointed MemType when instanciating it (e.g. for MemStr encoding or MemArray field_type). @@ -342,10 +373,10 @@ class Ptr(Num): dst_type._get_self_type = lambda: self._get_self_type() # dst_type cannot be patched here, since _get_self_type of the outer # class has not yet been set. Patching dst_type involves calling - # dst_type.pinned, which will only return a type that does not point + # dst_type.lval, which will only return a type that does not point # on MemSelf but on the right class only when _get_self_type of the # outer class has been replaced by _MetaMemStruct. - # In short, dst_type = dst_type.pinned is not valid here, it is done + # In short, dst_type = dst_type.lval is not valid here, it is done # lazily in _fix_dst_type self._dst_type = dst_type self._type_args = type_args @@ -358,7 +389,7 @@ class Ptr(Num): else: raise ValueError("Unsupported usecase for MemSelf, sorry") if isinstance(self._dst_type, Type): - self._dst_type = self._dst_type.pinned + self._dst_type = self._dst_type.lval @property def dst_type(self): @@ -374,7 +405,7 @@ class Ptr(Num): super(Ptr, self).set(vm, addr, val) def get(self, vm, addr): - return self.pinned(vm, addr) + return self.lval(vm, addr) def get_val(self, vm, addr): """Get the numeric value of a Ptr""" @@ -429,14 +460,14 @@ class Struct(Type): Mapped to MemStruct. - NOTE: The `.pinned` property of Struct creates classes on the fly. If an + NOTE: The `.lval` property of Struct creates classes on the fly. If an equivalent structure is created by subclassing MemStruct, an exception is raised to prevent creating multiple classes designating the same type. Example: s = Struct("Toto", [("f1", Num("I")), ("f2", Num("I"))]) - Toto1 = s.pinned + Toto1 = s.lval # This raises an exception, because it describes the same structure as # Toto1 @@ -469,7 +500,7 @@ class Struct(Type): vm.set_mem(addr, raw) def get(self, vm, addr): - return self.pinned(vm, addr) + return self.lval(vm, addr) def get_field(self, vm, addr, name): """Get a field value by @name and base structure @addr in @vm VmMngr.""" @@ -618,7 +649,7 @@ class Array(Type): "Assignment only implemented for list and MemSizedArray") def get(self, vm, addr): - return self.pinned(vm, addr) + return self.lval(vm, addr) def size(self): if self.is_sized(): @@ -949,7 +980,7 @@ class MemType(object): virtual memory. Globally, MemTypes are not meant to be used directly: specialized - subclasses are generated by Type(...).pinned and should be used instead. + subclasses are generated by Type(...).lval and should be used instead. The main exception is MemStruct, which you may want to subclass yourself for syntactic ease. """ @@ -1022,11 +1053,11 @@ class MemType(object): """Cast this MemType to another MemType (same address, same vm, but different type). Return the casted MemType. - @other_type: either a Type instance (other_type.pinned is used) or a + @other_type: either a Type instance (other_type.lval is used) or a MemType subclass """ if isinstance(other_type, Type): - other_type = other_type.pinned + other_type = other_type.lval return other_type(self._vm, self.get_addr()) def cast_field(self, field, other_type, *type_args, **type_kwargs): @@ -1035,7 +1066,7 @@ class MemType(object): @field: field specification, for example its name for a struct, or an index in an array. See the subclass doc. - @other_type: either a Type instance (other_type.pinned is used) or a + @other_type: either a Type instance (other_type.lval is used) or a MemType subclass """ raise NotImplementedError("Abstract") @@ -1127,7 +1158,7 @@ class MemStruct(MemType): assert mstruct.other.deref.foo == 0x1234 Note that: - MyStruct = Struct("MyStruct", ).pinned + MyStruct = Struct("MyStruct", ).lval is equivalent to the previous MyStruct declaration. See the various Type-s doc for more information. See MemStruct.gen_fields @@ -1166,7 +1197,7 @@ class MemStruct(MemType): def cast_field(self, field, other_type): """In this implementation, @field is a field name""" if isinstance(other_type, Type): - other_type = other_type.pinned + other_type = other_type.lval return other_type(self._vm, self.get_addr(field)) # Field generation method, voluntarily public to be able to gen fields @@ -1214,7 +1245,7 @@ class MemStruct(MemType): "one. Use it instead.") # Register this class so that another one will not be created when - # calling cls._type.pinned + # calling cls._type.lval DYN_MEM_STRUCT_CACHE[cls._type] = cls cls._gen_attributes() @@ -1336,7 +1367,7 @@ class MemArray(MemType): It can be indexed for setting and getting elements, example: - array = Array(Num("I")).pinned(vm, addr)) + array = Array(Num("I")).lval(vm, addr)) array[2] = 5 array[4:8] = [0, 1, 2, 3] print array[20] @@ -1371,7 +1402,7 @@ class MemSizedArray(MemArray): """A fixed size MemArray. This type is dynamically sized. Generate a fixed @field_type and @array_len - array which has a static size by using Array(type, size).pinned. + array which has a static size by using Array(type, size).lval. """ @property diff --git a/test/core/types.py b/test/core/types.py index db72449c..f1ff706b 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -119,7 +119,7 @@ assert memval == 8 # Str tests ## Basic tests -memstr = Str().pinned(jitter.vm, addr_str) +memstr = Str().lval(jitter.vm, addr_str) memstr.val = "" assert memstr.val == "" assert jitter.vm.get_mem(memstr.get_addr(), 1) == '\x00' @@ -138,13 +138,13 @@ assert mstruct.s.deref.val == "That's all folks!" assert memstr.val == "That's all folks!" ## Other address, same value, same encoding -memstr2 = Str().pinned(jitter.vm, addr_str2) +memstr2 = Str().lval(jitter.vm, addr_str2) memstr2.val = "That's all folks!" assert memstr2.get_addr() != memstr.get_addr() assert memstr2 == memstr ## Same value, other encoding -memstr3 = Str("utf16").pinned(jitter.vm, addr_str3) +memstr3 = Str("utf16").lval(jitter.vm, addr_str3) memstr3.val = "That's all folks!" assert memstr3.get_addr() != memstr.get_addr() assert memstr3.get_size() != memstr.get_size() # Size is different @@ -156,7 +156,7 @@ assert memstr3.val == memstr.val # But the python value is the same # Array tests # Allocate buffer manually, since memarray is unsized alloc_addr = my_heap.vm_alloc(jitter.vm, 0x100) -memarray = Array(Num("I")).pinned(jitter.vm, alloc_addr) +memarray = Array(Num("I")).lval(jitter.vm, alloc_addr) memarray[0] = 0x02 assert memarray[0] == 0x02 assert jitter.vm.get_mem(memarray.get_addr(), @@ -188,8 +188,8 @@ except ValueError: pass -memsarray = Array(Num("I"), 10).pinned(jitter.vm) -# And Array(type, size).pinned generates statically sized types +memsarray = Array(Num("I"), 10).lval(jitter.vm) +# And Array(type, size).lval generates statically sized types assert memsarray.sizeof() == Num("I").size() * 10 memsarray.memset('\xcc') assert memsarray[0] == 0xcccccccc @@ -237,7 +237,7 @@ for val in ms2.s2: assert val == 1 ### Field assignment (MemSizedArray) -array2 = Array(Num("B"), 10).pinned(jitter.vm) +array2 = Array(Num("B"), 10).lval(jitter.vm) jitter.vm.set_mem(array2.get_addr(), '\x02'*10) for val in array2: assert val == 2 @@ -406,8 +406,8 @@ assert b.a.deref == a # Cast tests # MemStruct cast -MemInt = Num("I").pinned -MemShort = Num("H").pinned +MemInt = Num("I").lval +MemShort = Num("H").lval dword = MemInt(jitter.vm) dword.val = 0x12345678 assert isinstance(dword.cast(MemShort), MemShort) @@ -427,7 +427,7 @@ ms2.s2[5] = 0xab assert MemShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd # void* style cast -MemPtrVoid = Ptr("I", Void()).pinned +MemPtrVoid = Ptr("I", Void()).lval p = MemPtrVoid(jitter.vm) p.val = mstruct.get_addr() assert p.deref.cast(MyStruct) == mstruct @@ -474,17 +474,17 @@ assert BitField(Num("B"), [("f1", 1), ("f2", 4), ("f3", 1)]) != \ BitField(Num("B"), [("f1", 2), ("f2", 4), ("f3", 1)]) -# Quick MemField.pinned/MemField hash test -assert Num("f").pinned(jitter.vm, addr) == Num("f").pinned(jitter.vm, addr) +# Quick MemField.lval/MemField hash test +assert Num("f").lval(jitter.vm, addr) == Num("f").lval(jitter.vm, addr) # Types are cached -assert Num("f").pinned == Num("f").pinned -assert Num("d").pinned != Num("f").pinned -assert Union([("f1", Num("I")), ("f2", Num("H"))]).pinned == \ - Union([("f1", Num("I")), ("f2", Num("H"))]).pinned -assert Array(Num("B")).pinned == Array(Num("B")).pinned -assert Array(Num("I")).pinned != Array(Num("B")).pinned -assert Array(Num("B"), 20).pinned == Array(Num("B"), 20).pinned -assert Array(Num("B"), 19).pinned != Array(Num("B"), 20).pinned +assert Num("f").lval == Num("f").lval +assert Num("d").lval != Num("f").lval +assert Union([("f1", Num("I")), ("f2", Num("H"))]).lval == \ + Union([("f1", Num("I")), ("f2", Num("H"))]).lval +assert Array(Num("B")).lval == Array(Num("B")).lval +assert Array(Num("I")).lval != Array(Num("B")).lval +assert Array(Num("B"), 20).lval == Array(Num("B"), 20).lval +assert Array(Num("B"), 19).lval != Array(Num("B"), 20).lval # Repr tests @@ -496,8 +496,8 @@ print repr(cont), '\n' print repr(uni), '\n' print repr(bit), '\n' print repr(ideas), '\n' -print repr(Array(MyStruct2.get_type(), 2).pinned(jitter.vm, addr)), '\n' -print repr(Num("f").pinned(jitter.vm, addr)), '\n' +print repr(Array(MyStruct2.get_type(), 2).lval(jitter.vm, addr)), '\n' +print repr(Num("f").lval(jitter.vm, addr)), '\n' print repr(memarray) print repr(memsarray) print repr(memstr) -- cgit 1.4.1 From bd85479c0c7aba2d9ec9aeb2e6b2c4bb4b54d3e0 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Fri, 4 Dec 2015 11:05:51 +0100 Subject: MemStruct/Types: example on type manipulations --- example/jitter/types.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'example') diff --git a/example/jitter/types.py b/example/jitter/types.py index f4a7ddb4..6c0b59af 100644 --- a/example/jitter/types.py +++ b/example/jitter/types.py @@ -229,7 +229,29 @@ print "See that the original array has been modified:" print repr(data) print -# TODO: type manipulation examples +# Some type manipulation examples, for example let's construct an argv for +# a program: +# Let's say that we have two arguments, +1 for the program name and +1 for the +# final null ptr in argv, the array has 4 elements: +argv_t = Array(Ptr(" Date: Sat, 5 Dec 2015 14:44:23 +0100 Subject: Types: Type size is now a property --- example/jitter/types.py | 2 +- miasm2/core/types.py | 35 +++++++++++++++++++++-------------- test/core/types.py | 10 +++++----- 3 files changed, 27 insertions(+), 20 deletions(-) (limited to 'example') diff --git a/example/jitter/types.py b/example/jitter/types.py index 6c0b59af..e714372c 100644 --- a/example/jitter/types.py +++ b/example/jitter/types.py @@ -171,7 +171,7 @@ link.push(DataArray(vm)) assert link.size == 3 # If you get it directly from the VM, it is updated as well raw_size = vm.get_mem(link.get_addr("size"), link.get_type() - .get_field_type("size").size()) + .get_field_type("size").size) assert raw_size == '\x03\x00\x00\x00' print "The linked list just built:" diff --git a/miasm2/core/types.py b/miasm2/core/types.py index bf8f7823..03d23a5c 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -262,7 +262,7 @@ class Type(object): def get(self, vm, addr): """Get the python value of a field from a VmMngr memory at @addr.""" - raw = vm.get_mem(addr, self.size()) + raw = vm.get_mem(addr, self.size) return self._unpack(raw) @property @@ -306,12 +306,13 @@ class Type(object): """ self._self_type = self_type + @property def size(self): """Return the size in bytes of the serialized version of this field""" raise NotImplementedError() def __len__(self): - return self.size() + return self.size def __neq__(self, other): return not self == other @@ -332,6 +333,7 @@ class RawStruct(Type): def _unpack(self, raw_str): return struct.unpack(self._fmt, raw_str) + @property def size(self): return struct.calcsize(self._fmt) @@ -517,7 +519,7 @@ class Struct(Type): # For reflexion field._set_self_type(self) self._fields_desc[name] = {"field": field, "offset": offset} - offset += field.size() + offset += field.size @property def fields(self): @@ -549,8 +551,9 @@ class Struct(Type): offset = self.get_offset(name) field.set(vm, addr + offset, val) + @property def size(self): - return sum(field.size() for _, field in self.fields) + return sum(field.size for _, field in self.fields) def get_offset(self, field_name): """ @@ -609,8 +612,9 @@ class Union(Struct): """@field_list: a [(name, field)] list, see the class doc""" super(Union, self).__init__("union", field_list) + @property def size(self): - return max(field.size() for _, field in self.fields) + return max(field.size for _, field in self.fields) def get_offset(self, field_name): return 0 @@ -658,7 +662,7 @@ class Array(Type): def set(self, vm, addr, val): # MemSizedArray assignment if isinstance(val, MemSizedArray): - if val.array_len != self.array_len or len(val) != self.size(): + if val.array_len != self.array_len or len(val) != self.size: raise ValueError("Size mismatch in MemSizedArray assignment") raw = str(val) vm.set_mem(addr, raw) @@ -670,7 +674,7 @@ class Array(Type): offset = 0 for elt in val: self.field_type.set(vm, addr + offset, elt) - offset += self.field_type.size() + offset += self.field_type.size else: raise RuntimeError( @@ -679,6 +683,7 @@ class Array(Type): def get(self, vm, addr): return self.lval(vm, addr) + @property def size(self): if self.is_sized(): return self.get_offset(self.array_len) @@ -688,7 +693,7 @@ class Array(Type): def get_offset(self, idx): """Returns the offset of the item at index @idx.""" - return self.field_type.size() * idx + return self.field_type.size * idx def get_item(self, vm, addr, idx): """Get the item(s) at index @idx. @@ -745,7 +750,7 @@ class Array(Type): def _check_bounds(self, idx): if not isinstance(idx, (int, long)): raise ValueError("index must be an int or a long") - if idx < 0 or (self.is_sized() and idx >= self.size()): + if idx < 0 or (self.is_sized() and idx >= self.size): raise IndexError("Index %s out of bounds" % idx) def _get_pinned_base_class(self): @@ -786,7 +791,7 @@ class Bits(Type): def set(self, vm, addr, val): val_mask = (1 << self._bits) - 1 val_shifted = (val & val_mask) << self._bit_offset - num_size = self._num.size() * 8 + num_size = self._num.size * 8 full_num_mask = (1 << num_size) - 1 num_mask = (~(val_mask << self._bit_offset)) & full_num_mask @@ -801,8 +806,9 @@ class Bits(Type): res_val = (num_val >> self._bit_offset) & val_mask return res_val + @property def size(self): - return self._num.size() + return self._num.size @property def bit_size(self): @@ -868,7 +874,7 @@ class BitField(Union): for name, bits in bit_list: fields.append((name, Bits(self._num, bits, offset))) offset += bits - if offset > self._num.size() * 8: + if offset > self._num.size == 8: raise ValueError("sum of bit lengths is > to the backing num size") super(BitField, self).__init__(fields) @@ -968,6 +974,7 @@ class Str(Type): set_str = self.encodings[self.enc][1] set_str(vm, addr, s) + @property def size(self): """This type is unsized.""" raise ValueError("Str is unsized") @@ -1130,7 +1137,7 @@ class MemType(object): """Return the static size of this type. By default, it is the size of the underlying Type. """ - return cls._type.size() + return cls._type.size def get_size(self): """Return the dynamic size of this structure (e.g. the size of an @@ -1518,7 +1525,7 @@ class MemSizedArray(MemArray): return self.get_type().array_len def get_size(self): - return self.get_type().size() + return self.get_type().size def __iter__(self): for i in xrange(self.get_type().array_len): diff --git a/test/core/types.py b/test/core/types.py index f1ff706b..de6034ef 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -160,11 +160,11 @@ memarray = Array(Num("I")).lval(jitter.vm, alloc_addr) memarray[0] = 0x02 assert memarray[0] == 0x02 assert jitter.vm.get_mem(memarray.get_addr(), - Num("I").size()) == '\x02\x00\x00\x00' + Num("I").size) == '\x02\x00\x00\x00' memarray[2] = 0xbbbbbbbb assert memarray[2] == 0xbbbbbbbb -assert jitter.vm.get_mem(memarray.get_addr() + 2 * Num("I").size(), - Num("I").size()) == '\xbb\xbb\xbb\xbb' +assert jitter.vm.get_mem(memarray.get_addr() + 2 * Num("I").size, + Num("I").size) == '\xbb\xbb\xbb\xbb' try: s = str(memarray) assert False, "Should raise" @@ -190,7 +190,7 @@ except ValueError: memsarray = Array(Num("I"), 10).lval(jitter.vm) # And Array(type, size).lval generates statically sized types -assert memsarray.sizeof() == Num("I").size() * 10 +assert memsarray.sizeof() == Num("I").size * 10 memsarray.memset('\xcc') assert memsarray[0] == 0xcccccccc assert len(memsarray) == 10 * 4 @@ -356,7 +356,7 @@ class UnhealthyIdeas(MemStruct): ("pppself", Ptr("I", Ptr("I", Ptr("I", Self())))), ] -p_size = Ptr("I", Void()).size() +p_size = Ptr("I", Void()).size ideas = UnhealthyIdeas(jitter.vm) ideas.memset() -- cgit 1.4.1 From b421c5059f2a7bfd47b5bdf4c3488a3e8bda5b40 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 18 Jan 2016 14:04:20 +0100 Subject: Types: misleading comment correction in example --- example/jitter/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'example') diff --git a/example/jitter/types.py b/example/jitter/types.py index e714372c..c37c3b84 100644 --- a/example/jitter/types.py +++ b/example/jitter/types.py @@ -239,7 +239,7 @@ print "3 arguments argv type:", argv_t # alloc argv somewhere argv = argv_t.lval(vm) -# Auto alloc with a buffer type +# Auto alloc with the MemStr.from_str helper MemStrAnsi = Str().lval argv[0].val = MemStrAnsi.from_str(vm, "./my-program").get_addr() argv[1].val = MemStrAnsi.from_str(vm, "arg1").get_addr() -- cgit 1.4.1