diff options
Diffstat (limited to '')
| -rw-r--r-- | miasm2/analysis/mem.py | 290 | ||||
| -rw-r--r-- | test/analysis/mem.py | 4 |
2 files changed, 269 insertions, 25 deletions
diff --git a/miasm2/analysis/mem.py b/miasm2/analysis/mem.py index 4ba3f842..be14e013 100644 --- a/miasm2/analysis/mem.py +++ b/miasm2/analysis/mem.py @@ -1,3 +1,81 @@ +"""This module provides classes to manipulate C structures backed by a VmMngr +object (a miasm VM virtual memory). + +The main idea is to declare the fields of the structure in the class: + + # FIXME: "I" => "u32" + class MyStruct(MemStruct): + fields = [ + # Integer field: just struct.pack fields with one value + ("num", Num("I")), + ("flags", Num("B")), + # Ptr fields are Num, but they can also be dereferenced + # (self.deref_<field>). Deref can be read and set. + ("other", Ptr("I", OtherStruct)), + # Ptr to a variable length String + ("s", Ptr("I", MemStr)), + ("i", Ptr("I", Num("I"))), + ] + +And access the fields: + + mstruct = MyStruct(jitter.vm, addr) + mstruct.num = 3 + assert mstruct.num == 3 + mstruct.other = addr2 + mstruct.deref_other = OtherStruct(jitter.vm, addr) + +The `addr` argument can be omited if an allocator is set, in which case the +structure will be automatically allocated in memory: + + my_heap = miasm2.os_dep.common.heap() + set_allocator(my_heap) + +Note that some structures (e.g. MemStr or MemArray) do not have a static size +and cannot be allocated automatically. + + +As you saw previously, to use this module, you just have to inherit from +MemStruct and define a list of (<field_name>, <field_definition>). Availabe +MemField classes are: + + - Num: for number (float or int) handling + - Struct: abstraction over a simple struct pack/unpack + - Ptr: a pointer to another MemStruct instance + - Inline: include another MemStruct as a field (equivalent to having a + struct field into another struct in C) + - Array: a fixed size array of MemFields (points) + - Union: similar to `union` in C, list of MemFields at the same offset in a + structure; the union has the size of the biggest MemField + - BitField: similar to C bitfields, a list of + [(<field_name), (number_of_bits)]; creates fields that correspond to + certain bits of the field + +A MemField always has a fixed size in memory. + + +Some special memory structures are already implemented; they all are subclasses +of MemStruct with a custom implementation: + + - MemSelf: this class is just a special marker to reference a MemStruct + subclass inside itself. Works with Ptr and Array (e.g. Ptr(_, MemSelf) + for a pointer the same type as the class who uses this kind of field) + - MemVoid: empty MemStruct, placeholder to be casted to an implemented + MemStruct subclass + - MemStr: represents a string in memory; the encoding can be passed to the + constructor (null terminated ascii/ansi or null terminated utf16) + - MemArray: an unsized array of MemField; unsized here means that there is + no defined sized for this array, equivalent to a int* or char*-style table + in C. It cannot be allocated automatically, since it has no known size + - MemSizedArray: a sized MemArray, can be automatically allocated in memory + and allows more operations than MemArray + - mem: a function that dynamically generates a MemStruct subclass from a + MemField. This class has only one field named "value". + +A MemStruct do not always have a static size (cls.sizeof()) nor a dynamic size +(self.get_size()). +""" + import logging import struct @@ -11,6 +89,13 @@ log.setLevel(logging.WARN) allocator = None def set_allocator(alloc_func): + """Sets an allocator for this module; allows to instanciate statically sized + MemStructs (i.e. sizeof() is implemented) without specifying the address + (the object is allocated by @alloc_func in the vm. + + Args: + alloc_func: func(VmMngr) -> integer_address + """ global allocator allocator = alloc_func @@ -18,11 +103,18 @@ def set_allocator(alloc_func): # Helpers def indent(s, size=4): + """Indents a string with @size spaces""" return ' '*size + ('\n' + ' '*size).join(s.split('\n')) - + # FIXME: copied from miasm2.os_dep.common and fixed def get_str_ansi(vm, addr, max_char=None): + """Gets a null terminated ANSI encoded string from a VmMngr. + + Args: + vm: VmMngr instance + max_char: max number of characters to get in memory + """ l = 0 tmp = addr while ((max_char is None or l < max_char) and @@ -34,6 +126,16 @@ def get_str_ansi(vm, addr, max_char=None): # TODO: get_raw_str_utf16 for length calculus def get_str_utf16(vm, addr, max_char=None): + """Gets a (double) null terminated utf16 little endian encoded string from + a VmMngr. This encoding is mainly used in Windows. + + FIXME: the implementation do not work with codepoints that are encoded on + more than 2 bytes in utf16. + + Args: + vm: VmMngr instance + max_char: max number of bytes to get in memory + """ l = 0 tmp = addr # TODO: test if fetching per page rather than 2 byte per 2 byte is worth it? @@ -46,16 +148,26 @@ def get_str_utf16(vm, addr, max_char=None): def set_str_ansi(vm, addr, s): + """Encodes a string to null terminated ascii/ansi and sets it in a VmMngr + memory. + + Args: + vm: VmMngr instance + addr: start address to serialize the string to + s: the str to serialize + """ vm.set_mem(addr, s + "\x00") def set_str_utf16(vm, addr, s): + """Same as set_str_ansi with (double) null terminated utf16 encoding.""" s = (s + '\x00').encode('utf-16le') vm.set_mem(addr, s) # MemField to MemStruct helper +# TODO: cache generated types def mem(field): """Generates a MemStruct subclass from a field. The field's value can be accessed through self.value or self.deref_value if field is a Ptr. @@ -69,33 +181,51 @@ def mem(field): # MemField classes class MemField(object): - """Base class to provide methods to set and get fields from virtual mem.""" + """Base class to provide methods to set and get fields from virtual mem. + + Subclasses can either override _pack and _unpack, or get and set if data + serialization requires more work (see Inline implementation for an example). + """ _self_type = None def _pack(self, val): - """Returns a packed str""" + """Serializes the python value @val to a raw str""" raise NotImplementedError() - def _unpack(self, packed_str): - """Returns an object.""" + def _unpack(self, raw_str): + """Deserializes a raw str to an object representing the python value + of this field. + """ raise NotImplementedError() def set(self, vm, addr, val): + """Set a VmMngr memory from a value. + + Args: + vm: VmMngr instance + addr: the start adress in memory to set + val: the python value to serialize in @vm at @addr + """ raw = self._pack(val) vm.set_mem(addr, raw) def get(self, vm, addr): + """Get the python value of a field from a VmMngr memory at @addr.""" raw = vm.get_mem(addr, self.size()) return self._unpack(raw) - def get_self_type(self): + def _get_self_type(self): return self._self_type - def set_self_type(self, self_type): + def _set_self_type(self, self_type): + """If this field refers to MemSelf, replace it with @self_type (a + MemStruct subclass) when using it. Generally not used outside the lib. + """ self._self_type = self_type def size(self): + """Returns the size in bytes of the serialized version of this field""" raise NotImplementedError() def __len__(self): @@ -103,6 +233,10 @@ class MemField(object): class Struct(MemField): + """Dumb struct.pack/unpack field. Mainly used to factorize code. + + Value is a tuple corresponding to the struct @fmt passed to the constructor. + """ def __init__(self, fmt): self._fmt = fmt @@ -110,8 +244,8 @@ class Struct(MemField): def _pack(self, fields): return struct.pack(self._fmt, *fields) - def _unpack(self, packed_str): - return struct.unpack(self._fmt, packed_str) + def _unpack(self, raw_str): + return struct.unpack(self._fmt, raw_str) def size(self): return struct.calcsize(self._fmt) @@ -121,12 +255,17 @@ class Struct(MemField): class Num(Struct): + """Represents a number (integer or float). The number is encoded with + a struct-style format which must represent only one value. + + TODO: use u32, i16, etc. for format. + """ def _pack(self, number): return super(Num, self)._pack([number]) - def _unpack(self, packed_str): - upck = super(Num, self)._unpack(packed_str) + def _unpack(self, raw_str): + upck = super(Num, self)._unpack(raw_str) if len(upck) > 1: raise ValueError("Num format string unpacks to multiple values, " "should be 1") @@ -134,6 +273,10 @@ class Num(Struct): class Ptr(Num): + """Special case of number of which value indicates the address of a + MemStruct. Provides deref_<field> as well as <field> when used, to set and + get the pointed MemStruct. + """ def __init__(self, fmt, dst_type, *type_args, **type_kwargs): if not isinstance(dst_type, MemField) and\ @@ -146,31 +289,35 @@ class Ptr(Num): super(Ptr, self).__init__(fmt) if isinstance(dst_type, MemField): # Patch the field to propagate the MemSelf replacement - dst_type.get_self_type = lambda: self.get_self_type() + dst_type._get_self_type = lambda: self._get_self_type() dst_type = mem(dst_type) self._dst_type = dst_type self._type_args = type_args self._type_kwargs = type_kwargs - def set_self_type(self, self_type): - super(Ptr, self).set_self_type(self_type) - def _fix_dst_type(self): if self._dst_type == MemSelf: - if self.get_self_type() is not None: - self._dst_type = self.get_self_type() + if self._get_self_type() is not None: + self._dst_type = self._get_self_type() else: raise ValueError("Unsupported usecase for MemSelf, sorry") @property def dst_type(self): + """Returns the type (MemStruct subtype) this Ptr points to.""" self._fix_dst_type() return self._dst_type def deref_get(self, vm, addr): + """Deserializes the data in @vm (VmMngr) at @addr to self.dst_type. + Equivalent to a pointer dereference rvalue in C. + """ return self.dst_type(vm, addr, *self._type_args, **self._type_kwargs) def deref_set(self, vm, addr, val): + """Serializes the @val MemStruct subclass instance in @vm (VmMngr) at + @addr. Equivalent to a pointer dereference assignment in C. + """ # Sanity check if self.dst_type != val.__class__: log.warning("Original type was %s, overriden by value of type %s", @@ -184,6 +331,25 @@ class Ptr(Num): class Inline(MemField): + """Field used to inline a MemStruct in another MemStruct. Equivalent to + having a struct field in a C struct. + + Concretely: + + class MyStructClass(MemStruct): + fields = [("f1", Num("I")), ("f2", Num("I"))] + + class Example(MemStruct): + fields = [("mystruct", Inline(MyStructClass))] + + ex = Example(vm, addr) + ex.mystruct.f2 = 3 # inlined structure field access + ex.mystruct = MyStructClass(vm, addr2) # struct copy + + It can be seen like a bridge to use a MemStruct as a MemField + + TODO: make the Inline implicit when setting a field to be a MemStruct + """ def __init__(self, inlined_type, *type_args, **type_kwargs): if not issubclass(inlined_type, MemStruct): @@ -207,21 +373,38 @@ class Inline(MemField): class Array(MemField): + """A fixed size array (contiguous sequence) of a MemField subclass + elements. Similar to something like the char[10] type in C. + + Getting an array field actually returns a MemSizedArray. Setting it is + possible with either a list or a MemSizedArray instance. Examples of syntax: + + class Example(MemStruct): + fields = [("array", Array(Num("B"), 4))] + + mystruct = Example(vm, addr) + mystruct.array[3] = 27 + mystruct.array = [1, 4, 8, 9] + mystruct.array = MemSizedArray(vm, addr2, Num("B"), 4) + """ def __init__(self, field_type, length): self._field_type = field_type self._array_len = length - def set_self_type(self, self_type): - super(Array, self).set_self_type(self_type) - self._field_type.set_self_type(self_type) + def _set_self_type(self, self_type): + super(Array, self)._set_self_type(self_type) + self._field_type._set_self_type(self_type) def set(self, vm, addr, val): + # MemSizedArray assignment if isinstance(val, MemSizedArray): if val.array_len != self._array_len or len(val) != self.size(): raise ValueError("Size mismatch in MemSizedArray assignment") raw = str(val) vm.set_mem(addr, raw) + + # list assignment elif isinstance(val, list): if len(val) != self._array_len: raise ValueError("Size mismatch in MemSizedArray assignment ") @@ -229,6 +412,7 @@ class Array(MemField): for elt in val: self._field_type.set(vm, addr + offset, elt) offset += self._field_type.size() + else: raise NotImplementedError( "Assignment only implemented for list and MemSizedArray") @@ -244,8 +428,27 @@ class Array(MemField): class Union(MemField): + """Allows to put multiple fields at the same offset in a MemStruct, similar + to unions in C. The Union will have the size of the largest of its fields. + + Example: + + class Example(MemStruct): + fields = [("uni", Union([ + ("f1", Num("<B")), + ("f2", Num("<H")) + ]) + )] + + ex = Example(vm, addr) + ex.f2 = 0x1234 + assert ex.f1 == 0x34 + assert ex.uni == '\x34\x12' + assert ex.get_addr("f1") == ex.get_addr("f2") + """ + def __init__(self, field_list): - """[(name, field)] list""" + """field_list is a [(name, field)] list, see the class doc""" self.field_list = field_list def size(self): @@ -267,6 +470,15 @@ class Union(MemField): class Bits(MemField): + """Helper class for BitField, not very useful on its own. Represents some + bits of a Num. + + The @backing_num is used to know how to serialize/deserialize data in vm, + but getting/setting this fields only affects bits from @bit_offset to + @bit_offset + @bits. Masking and shifting is handled by the class, the aim + is to provide a transparent way to set and get some bits of a num. + """ + def __init__(self, backing_num, bits, bit_offset): if not isinstance(backing_num, Num): raise ValueError("backing_num should be a Num instance") @@ -297,10 +509,14 @@ class Bits(MemField): @property def bit_size(self): + """Number of bits read/written by this class""" return self._bits @property def bit_offset(self): + """Offset in bits (beginning at 0, the LSB) from which to read/write + bits. + """ return self._bit_offset def __repr__(self): @@ -308,8 +524,36 @@ class Bits(MemField): self._bit_offset, self._bit_offset + self._bits) class BitField(Union): + """A C-like bitfield. + + Constructed with a list [(<field_name>, <number_of_bits>)] and a + @backing_num. The @backing_num is a Num instance that determines the total + size of the bitfield and the way the bits are serialized/deserialized (big + endian int, little endian short...). Can be seen (and implemented) as a + Union of Bits fields. + + Creates fields that allow to access the bitfield fields easily. Example: + + class Example(MemStruct): + fields = [("bf", BitField(Num("B"), [ + ("f1", 2), + ("f2", 4), + ("f3", 1) + ]) + )] + + ex = Example(vm, addr) + ex.memset() + ex.f2 = 2 + ex.f1 = 5 # 5 does not fit on two bits, it will be binarily truncated + assert ex.f1 == 3 + assert ex.f2 == 2 + assert ex.f3 == 0 # previously memset() + assert ex.bf == 3 + 2 << 2 + """ + def __init__(self, backing_num, bit_list): - """bit_list: [(name, n_bits)]""" + """@backing num: Num intance, @bit_list: [(name, n_bits)]""" self._num = backing_num fields = [] offset = 0 @@ -465,7 +709,7 @@ class MemStruct(object): offset = 0 for name, field in cls.fields: # For reflexion - field.set_self_type(cls) + field._set_self_type(cls) cls.gen_attr(name, field, offset) offset += field.size() cls._size = offset diff --git a/test/analysis/mem.py b/test/analysis/mem.py index 9eba8fca..df1df9bc 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -20,10 +20,10 @@ class OtherStruct(MemStruct): class MyStruct(MemStruct): fields = [ - # Integer field: just struct.pack fields with one value + # Number field: just struct.pack fields with one value ("num", Num("I")), ("flags", Num("B")), - # Ptr fields are Int, but they can also be dereferenced + # Ptr fields are Num, but they can also be dereferenced # (self.deref_<field>). Deref can be read and set. ("other", Ptr("I", OtherStruct)), # Ptr to a variable length String |