diff options
| author | Florent Monjalet <florent.monjalet@gmail.com> | 2015-11-07 20:06:21 +0100 |
|---|---|---|
| committer | Florent Monjalet <florent.monjalet@gmail.com> | 2016-01-18 14:02:31 +0100 |
| commit | 6e635113b53f932573687f9a6e3fc227cde6c0d9 (patch) | |
| tree | 3312763f19ae5cc68a69762559486512feb90d13 | |
| parent | 6c879d5e03fdfa973b0695cf8259231f09895bf9 (diff) | |
| download | miasm-6e635113b53f932573687f9a6e3fc227cde6c0d9.tar.gz miasm-6e635113b53f932573687f9a6e3fc227cde6c0d9.zip | |
Introducing MemStruct feature in miasm2.analysis.mem
Diffstat (limited to '')
| -rw-r--r-- | miasm2/analysis/mem.py | 696 | ||||
| -rw-r--r-- | test/analysis/mem.py | 440 | ||||
| -rw-r--r-- | test/test_all.py | 2 |
3 files changed, 1138 insertions, 0 deletions
diff --git a/miasm2/analysis/mem.py b/miasm2/analysis/mem.py new file mode 100644 index 00000000..a967e58f --- /dev/null +++ b/miasm2/analysis/mem.py @@ -0,0 +1,696 @@ +import logging +import struct + +log = logging.getLogger(__name__) +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + +# TODO: alloc + +# Helpers + +def indent(s, size=4): + return ' '*size + ('\n' + ' '*size).join(s.split('\n')) + + +# FIXME: copied from miasm2.os_dep.common and fixed +def get_str_ansi(vm, addr, max_char=None): + l = 0 + tmp = addr + while ((max_char is None or l < max_char) and + vm.get_mem(tmp, 1) != "\x00"): + tmp += 1 + l += 1 + return vm.get_mem(addr, l) + + +# TODO: get_raw_str_utf16 for length calculus +def get_str_utf16(vm, addr, max_char=None): + l = 0 + tmp = addr + # TODO: test if fetching per page rather than 2 byte per 2 byte is worth it? + while ((max_char is None or l < max_char) and + vm.get_mem(tmp, 2) != "\x00\x00"): + tmp += 2 + l += 2 + s = vm.get_mem(addr, l) + return s.decode('utf-16le') + + +def set_str_ansi(vm, addr, s): + vm.set_mem(addr, s + "\x00") + + +def set_str_utf16(vm, addr, s): + s = (s + '\x00').encode('utf-16le') + vm.set_mem(addr, s) + + +# MemField to MemStruct helper + +def mem(field): + """Generates a MemStruct subclass from a field. The field's value can + be accessed through self.value or self.deref_value if field is a Ptr. + """ + fields = [("value", field)] + # Build a type to contain the field type + mem_type = type("Mem%r" % field, (MemStruct,), {'fields': fields}) + return mem_type + + +# MemField classes + +class MemField(object): + """Base class to provide methods to set and get fields from virtual mem.""" + + _self_type = None + + def _pack(self, val): + """Returns a packed str""" + raise NotImplementedError() + + def _unpack(self, packed_str): + """Returns an object.""" + raise NotImplementedError() + + def set(self, vm, addr, val): + raw = self._pack(val) + vm.set_mem(addr, raw) + + def get(self, vm, addr): + raw = vm.get_mem(addr, self.size()) + return self._unpack(raw) + + def get_self_type(self): + return self._self_type + + def set_self_type(self, self_type): + self._self_type = self_type + + def size(self): + raise NotImplementedError() + + def __len__(self): + return self.size() + + +class Struct(MemField): + + def __init__(self, fmt): + self._fmt = fmt + + def _pack(self, fields): + return struct.pack(self._fmt, *fields) + + def _unpack(self, packed_str): + return struct.unpack(self._fmt, packed_str) + + def size(self): + return struct.calcsize(self._fmt) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, self._fmt) + + +class Num(Struct): + + def _pack(self, number): + return super(Num, self)._pack([number]) + + def _unpack(self, packed_str): + upck = super(Num, self)._unpack(packed_str) + if len(upck) > 1: + raise ValueError("Num format string unpacks to multiple values, " + "should be 1") + return upck[0] + + +class Ptr(Num): + + def __init__(self, fmt, dst_type, *type_args, **type_kwargs): + if not isinstance(dst_type, MemField) and\ + not (isinstance(dst_type, type) and\ + issubclass(dst_type, MemStruct)) and\ + not dst_type == MemSelf: + raise ValueError("dst_type of Ptr must be a MemStruct type, a " + "MemField instance, the MemSelf marker or a class " + "name.") + super(Ptr, self).__init__(fmt) + if isinstance(dst_type, MemField): + # Patch the field to propagate the MemSelf replacement + dst_type.get_self_type = lambda: self.get_self_type() + dst_type = mem(dst_type) + self._dst_type = dst_type + self._type_args = type_args + self._type_kwargs = type_kwargs + + def set_self_type(self, self_type): + super(Ptr, self).set_self_type(self_type) + + def _fix_dst_type(self): + global classes + + if self._dst_type == MemSelf: + if self.get_self_type() is not None: + self._dst_type = self.get_self_type() + else: + raise ValueError("Unsupported usecase for MemSelf, sorry") + + def deref_get(self, vm, addr): + self._fix_dst_type() + return self._dst_type(vm, addr, *self._type_args, **self._type_kwargs) + + def deref_set(self, vm, addr, val): + self._fix_dst_type() + # Sanity check + if self._dst_type != val.__class__: + log.warning("Original type was %s, overriden by value of type %s", + self._dst_type.__name__, val.__class__.__name__) + + # Actual job + vm.set_mem(addr, str(val)) + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self._dst_type) + + +class Inline(MemField): + + def __init__(self, inlined_type, *type_args, **type_kwargs): + if not issubclass(inlined_type, MemStruct): + raise ValueError("inlined type if Inline must be a MemStruct") + self._il_type = inlined_type + self._type_args = type_args + self._type_kwargs = type_kwargs + + def set(self, vm, addr, val): + raw = str(val) + vm.set_mem(addr, raw) + + def get(self, vm, addr): + return self._il_type(vm, addr) + + def size(self): + return self._il_type.sizeof() + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self._il_type) + + +class Array(MemField): + + def __init__(self, field_type, length): + self._field_type = field_type + self._array_len = length + + def set_self_type(self, self_type): + super(Array, self).set_self_type(self_type) + self._field_type.set_self_type(self_type) + + def set(self, vm, addr, val): + if isinstance(val, MemSizedArray): + if val.array_len != self._array_len or len(val) != self.size(): + raise ValueError("Size mismatch in MemSizedArray assignment") + raw = str(val) + vm.set_mem(addr, raw) + elif isinstance(val, list): + if len(val) != self._array_len: + raise ValueError("Size mismatch in MemSizedArray assignment ") + offset = 0 + for elt in val: + self._field_type.set(vm, addr + offset, elt) + offset += self._field_type.size() + else: + raise NotImplementedError( + "Assignment only implemented for list and MemSizedArray") + + def get(self, vm, addr): + return MemSizedArray(vm, addr, self._field_type, self._array_len) + + def size(self): + return self._field_type.size() * self._array_len + + def __repr__(self): + return "%r[%s]" % (self._field_type, self._array_len) + + +class Union(MemField): + def __init__(self, field_list): + """[(name, field)] list""" + self.field_list = field_list + + def size(self): + return max(field.size() for _, field in self.field_list) + + def set(self, vm, addr, val): + if not isinstance(val, str) or not len(str) == self.size(): + raise ValueError("Union can only be set with raw str of the Union's" + " size") + vm.set_mem(vm, addr, val) + + def get(self, vm, addr): + return vm.get_mem(addr, self.size()) + + def __repr__(self): + fields_repr = ', '.join("%s: %r" % (name, field) + for name, field in self.field_list) + return "%s(%s)" % (self.__class__.__name__, fields_repr) + + +class Bits(MemField): + def __init__(self, backing_num, bits, bit_offset): + if not isinstance(backing_num, Num): + raise ValueError("backing_num should be a Num instance") + self._num = backing_num + self._bits = bits + self._bit_offset = bit_offset + + def set(self, vm, addr, val): + val_mask = (1 << self._bits) - 1 + val_shifted = (val & val_mask) << self._bit_offset + num_size = self._num.size() * 8 + + full_num_mask = (1 << num_size) - 1 + num_mask = (~(val_mask << self._bit_offset)) & full_num_mask + + num_val = self._num.get(vm, addr) + res_val = (num_val & num_mask) | val_shifted + self._num.set(vm, addr, res_val) + + def get(self, vm, addr): + val_mask = (1 << self._bits) - 1 + num_val = self._num.get(vm, addr) + res_val = (num_val >> self._bit_offset) & val_mask + return res_val + + def size(self): + return self._num.size() + + @property + def bit_size(self): + return self._bits + + @property + def bit_offset(self): + return self._bit_offset + + def __repr__(self): + return "%s%r(%d:%d)" % (self.__class__.__name__, self._num, + self._bit_offset, self._bit_offset + self._bits) + +class BitField(Union): + def __init__(self, backing_num, bit_list): + """bit_list: [(name, n_bits)]""" + self._num = backing_num + fields = [] + offset = 0 + for name, bits in bit_list: + fields.append((name, Bits(self._num, bits, offset))) + offset += bits + if offset > self._num.size() * 8: + raise ValueError("sum of bit lengths is > to the backing num size") + super(BitField, self).__init__(fields) + + def set(self, vm, addr, val): + self._num.set(vm, addr, val) + + def get(self, vm, addr): + return self._num.get(vm, addr) + + +# MemStruct classes + +class _MetaMemStruct(type): + + def __init__(cls, name, bases, dct): + super(_MetaMemStruct, cls).__init__(name, bases, dct) + cls.gen_fields() + + def gen_fields(cls, fields=None): + if fields is None: + fields = cls.fields + cls._attrs = {} + offset = 0 + for name, field in cls.fields: + # For reflexion + field.set_self_type(cls) + cls.gen_attr(name, field, offset) + offset += field.size() + cls._size = offset + + def gen_attr(cls, name, field, offset): + # FIXME: move to gen_simple_arg? + cls._attrs[name] = {"field": field, "offset": offset} + cls._gen_simple_attr(name, field, offset) + if isinstance(field, Union): + cls._gen_union_attr(field, offset) + + def _gen_simple_attr(cls, name, field, offset): + # Generate self.<name> getter and setter + setattr(cls, name, property( + # default parameter allow to bind the value of name for a given + # loop iteration + lambda self, name=name: self.get_attr(name), + lambda self, val, name=name: self.set_attr(name, val) + )) + + # Generate self.deref_<name> getter and setter if this field is a + # Ptr + if isinstance(field, Ptr): + setattr(cls, "deref_%s" % name, property( + lambda self, name=name: self.deref_attr(name), + lambda self, val, name=name: self.set_deref_attr(name, val) + )) + + def _gen_union_attr(cls, union_field, offset): + if not isinstance(union_field, Union): + raise ValueError("field should be an Union instance") + for name, field in union_field.field_list: + cls.gen_attr(name, field, offset) + + def __repr__(cls): + return cls.__name__ + + +class MemStruct(object): + __metaclass__ = _MetaMemStruct + + fields = [] + + _size = None + + def __init__(self, vm, addr, *args, **kwargs): + super(MemStruct, self).__init__(*args, **kwargs) + self._vm = vm + self._addr = addr + + def get_addr(self, field_name=None): + if field_name is not None: + offset = self._attrs[field_name]['offset'] + else: + offset = 0 + return self._addr + offset + + @classmethod + def sizeof(cls): + if cls._size is None: + return sum(a["field"].size() for a in cls._attrs.itervalues()) + return cls._size + + def get_size(self): + return self.sizeof() + + def get_attr(self, attr): + if attr not in self._attrs: + raise AttributeError("'%s' object has no attribute '%s'" + % (self.__class__.__name__, attr)) + field = self._attrs[attr]["field"] + offset = self._attrs[attr]["offset"] + return field.get(self._vm, self.get_addr() + offset) + + def set_attr(self, attr, val): + if attr not in self._attrs: + raise AttributeError("'%s' object has no attribute '%s'" + % (self.__class__.__name__, attr)) + field = self._attrs[attr]["field"] + offset = self._attrs[attr]["offset"] + field.set(self._vm, self.get_addr() + offset, val) + + def deref_attr(self, attr): + addr = self.get_attr(attr) + field = self._attrs[attr]["field"] + assert isinstance(field, Ptr),\ + "Programming error: field should be a Ptr" + return field.deref_get(self._vm, addr) + + def set_deref_attr(self, attr, val): + addr = self.get_attr(attr) + field = self._attrs[attr]["field"] + assert isinstance(field, Ptr),\ + "Programming error: field should be a Ptr" + field.deref_set(self._vm, addr, val) + + def memset(self, byte='\x00'): + """memset(0)""" + if not isinstance(byte, str) or not len(byte) == 1: + raise ValueError("byte must be a 1-lengthed str") + self._vm.set_mem(self.get_addr(), byte * self.get_size()) + + # TODO: examples + def cast(self, other_type, *type_args, **type_kwargs): + return self.cast_field(None, other_type, *type_args, **type_kwargs) + + def cast_field(self, field_name, other_type, *type_args, **type_kwargs): + return other_type(self._vm, self.get_addr(field_name), + *type_args, **type_kwargs) + + def __len__(self): + return self.get_size() + + def __str__(self): + attrs = sorted(self._attrs.itervalues(), key=lambda a: a["offset"]) + out = [] + for attr in attrs: + field = attr["field"] + offset = attr["offset"] + out.append(self._vm.get_mem(self.get_addr() + offset, field.size())) + return ''.join(out) + + def __repr__(self): + attrs = sorted(self._attrs.iteritems(), key=lambda a: a[1]["offset"]) + out = [] + for name, attr in attrs: + field = attr["field"] + val_repr = repr(self.get_attr(name)) + if '\n' in val_repr: + val_repr = '\n' + indent(val_repr, 4) + out.append("%s: %r = %s" % (name, field, val_repr)) + return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) + + def __eq__(self, other): + # Do not test class equality, because of dynamically generated fields + # self.__class__ == other.__class__ and + # Could test attrs? + # TODO: self._attrs == other._attrs and + return str(self) == str(other) + + def __ne__(self, other): + return not (self == other) + + +class MemSelf(MemStruct): + """Special Marker class for reference to current class in a Ptr.""" + pass + + +class MemVoid(MemStruct): + def __repr__(self): + return self.__class__.__name__ + + +# This does not use _MetaMemStruct features, impl is custom for strings, +# because they are unsized. The only memory field is self.value. +class MemStr(MemStruct): + def __init__(self, vm, addr, encoding="ansi"): + # TODO: encoding as lambda + if encoding not in ["ansi", "utf16"]: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + super(MemStr, self).__init__(vm, addr) + self._enc = encoding + + @property + def value(self): + if self._enc == "ansi": + get_str = get_str_ansi + elif self._enc == "utf16": + get_str = get_str_utf16 + else: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + return get_str(self._vm, self.get_addr()) + + @value.setter + def value(self, s): + if self._enc == "ansi": + set_str = set_str_ansi + elif self._enc == "utf16": + set_str = set_str_utf16 + else: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + set_str(self._vm, self.get_addr(), s) + + def get_size(self): + """FIXME Quite unsafe: it reads the string underneath to determine the + size + """ + val = self.value + if self._enc == "ansi": + return len(val) + 1 + elif self._enc == "utf16": + # FIXME: real encoding... + return len(val) * 2 + 2 + else: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + + def __str__(self): + raw = self._vm.get_mem(self.get_addr(), self.get_size()) + return raw + + def __repr__(self): + return "%r(%s): %r" % (self.__class__, self._enc, self.value) + + +class MemArray(MemStruct): + _field_type = None + + def __init__(self, vm, addr, field_type=None): + super(MemArray, self).__init__(vm, addr) + if self._field_type is None and field_type is not None: + self._field_type = field_type + if self._field_type is None: + raise NotImplementedError( + "Provide field_type to instanciate this class, " + "or generate a subclass with mem_array_type.") + + @property + def field_type(self): + return self._field_type + + def _normalize_idx(self, idx): + # Noop for this type + return idx + + def _normalize_slice(self, slice_): + start = slice_.start if slice_.start is not None else 0 + stop = slice_.stop if slice_.stop is not None else self.get_size() + step = slice_.step if slice_.step is not None else 1 + return slice(start, stop, step) + + def _check_bounds(self, idx): + idx = self._normalize_idx(idx) + if not isinstance(idx, int) and not isinstance(idx, long): + raise ValueError("index must be an int or a long") + if idx < 0: + raise IndexError("Index %s out of bounds" % idx) + + def index2addr(self, idx): + self._check_bounds(idx) + addr = self.get_addr() + idx * self._field_type.size() + return addr + + def __getitem__(self, idx): + if isinstance(idx, slice): + res = [] + idx = self._normalize_slice(idx) + for i in xrange(idx.start, idx.stop, idx.step): + res.append(self._field_type.get(self._vm, self.index2addr(i))) + return res + else: + return self._field_type.get(self._vm, self.index2addr(idx)) + + def deref_get(self, idx): + return self._field_type.deref_get(self._vm, self[idx]) + + def __setitem__(self, idx, item): + if isinstance(idx, slice): + idx = self._normalize_slice(idx) + if len(item) != len(xrange(idx.start, idx.stop, idx.step)): + raise ValueError("Mismatched lengths in slice assignment") + for i, val in zip(xrange(idx.start, idx.stop, idx.step), item): + self._field_type.set(self._vm, self.index2addr(i), val) + else: + self._field_type.set(self._vm, self.index2addr(idx), item) + + def deref_set(self, idx, item): + self._field_type.deref_set(self._vm, self[idx], item) + + # just a shorthand + def as_mem_str(self, encoding="ansi"): + return self.cast(MemStr, encoding) + + @classmethod + def sizeof(cls): + raise ValueError("%s is unsized, which makes some operations" + " impossible. Use MemSizedArray instead.") + + def __str__(self): + raise ValueError("%s is unsized, which makes some operations" + " impossible. Use MemSizedArray instead.") + + def __repr__(self): + return "[%r, ...] [%r]" % (self[0], self._field_type) + + +def mem_array_type(field_type): + array_type = type('MemArray_%r' % (field_type,), + (MemArray,), + {'_field_type': field_type}) + return array_type + + +class MemSizedArray(MemArray): + _array_len = None + + def __init__(self, vm, addr, field_type=None, length=None): + super(MemSizedArray, self).__init__(vm, addr, field_type) + if self._array_len is None and length is not None: + self._array_len = length + if self._array_len is None or self._field_type is None: + raise NotImplementedError( + "Provide field_type and length to instanciate this class, " + "or generate a subclass with mem_sized_array_type.") + + @property + def array_len(self): + return self._array_len + + def sizeof(cls): + raise ValueError("MemSizedArray is not statically sized. Use " + "mem_sized_array_type to generate a type that is.") + + def get_size(self): + return self._array_len * self._field_type.size() + + def _normalize_idx(self, idx): + if idx < 0: + return self.get_size() - idx + return idx + + def _check_bounds(self, idx): + if not isinstance(idx, int) and not isinstance(idx, long): + raise ValueError("index must be an int or a long") + if idx < 0 or idx >= self.get_size(): + raise IndexError("Index %s out of bounds" % idx) + + def __iter__(self): + for i in xrange(self._array_len): + yield self[i] + + def __str__(self): + return self._vm.get_mem(self.get_addr(), self.get_size()) + + def __repr__(self): + item_reprs = [repr(item) for item in self] + if self.array_len > 0 and '\n' in item_reprs[0]: + items = '\n' + indent(',\n'.join(item_reprs), 2) + '\n' + else: + items = ', '.join(item_reprs) + return "[%s] [%r; %s]" % (items, self._field_type, self._array_len) + + +def mem_sized_array_type(field_type, length): + @classmethod + def sizeof(cls): + return cls._field_type.size() * cls._array_len + + array_type = type('MemSizedArray_%r_%s' % (field_type, length), + (MemSizedArray,), + {'_array_len': length, + '_field_type': field_type, + 'sizeof': sizeof}) + return array_type + + +# IDEA: func_args_* functions could return a dynamically generated MemStruct +# class instance diff --git a/test/analysis/mem.py b/test/analysis/mem.py new file mode 100644 index 00000000..4b306e67 --- /dev/null +++ b/test/analysis/mem.py @@ -0,0 +1,440 @@ +#!/usr/bin/env python + +# miasm2.analysis.mem tests + + +from miasm2.analysis.machine import Machine +from miasm2.analysis.mem import * +from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE + +# Two structures with some fields +class OtherStruct(MemStruct): + fields = [ + ("foo", Num("H")), + ] + +class MyStruct(MemStruct): + fields = [ + # Integer field: just struct.pack fields with one value + ("num", Num("I")), + ("flags", Num("B")), + # Ptr fields are Int, but they can also be dereferenced + # (self.deref_<field>). Deref can be read and set. + ("other", Ptr("I", OtherStruct)), + # Ptr to a variable length String + ("s", Ptr("I", MemStr)), + ("i", Ptr("I", Num("I"))), + ] + +jitter = Machine("x86_32").jitter("python") +jitter.init_stack() +addr = 0x1000 +addr2 = 0x1100 +addr3 = 0x1200 +addr_str = 0x1300 +addr_str2 = 0x1400 +addr_str3 = 0x1500 +addr4 = 0x1600 +addr5 = 0x1700 +addr6 = 0x1800 +addr7 = 0x1900 +addr8 = 0x2000 +addr9 = 0x2100 +addr10 = 0x2200 +addr11 = 0x2300 +size = 0x2000 +# Initialize all mem with 0xaa +jitter.vm.add_memory_page(addr, PAGE_READ | PAGE_WRITE, "\xaa"*size) + + +# MemStruct tests +## Creation +mstruct = MyStruct(jitter.vm, addr) +## Fields are read from the virtual memory +assert mstruct.num == 0xaaaaaaaa +assert mstruct.flags == 0xaa + +## Field assignment modifies virtual memory +mstruct.num = 3 +assert mstruct.num == 3 +memval = struct.unpack("I", jitter.vm.get_mem(mstruct.get_addr(), 4))[0] +assert memval == 3 + +## Memset sets the whole structure +mstruct.memset() +assert mstruct.num == 0 +assert mstruct.flags == 0 +assert mstruct.other == 0 +assert mstruct.s == 0 +assert mstruct.i == 0 +mstruct.memset('\x11') +assert mstruct.num == 0x11111111 +assert mstruct.flags == 0x11 +assert mstruct.other == 0x11111111 +assert mstruct.s == 0x11111111 +assert mstruct.i == 0x11111111 + + +# Ptr tests +## Setup for Ptr tests +other = OtherStruct(jitter.vm, addr2) +other.foo = 0x1234 +assert other.foo == 0x1234 + +## Basic usage +mstruct.other = other.get_addr() +assert mstruct.other == addr2 +assert mstruct.deref_other == other +assert mstruct.deref_other.foo == 0x1234 + +## Deref assignment +other2 = OtherStruct(jitter.vm, addr3) +other2.foo = 0xbeef +assert mstruct.deref_other != other2 +mstruct.deref_other = other2 +assert mstruct.deref_other == other2 +assert mstruct.deref_other.foo == 0xbeef +assert mstruct.other == addr2 # Addr did not change +assert other.foo == 0xbeef # Deref assignment copies by value +assert other2.foo == 0xbeef +assert other.get_addr() != other2.get_addr() # Not the same address +assert other == other2 # But same value + +## Same stuff for Ptr to MemField +mstruct.i = addr7 +mstruct.deref_i.value = 8 +assert mstruct.deref_i.value == 8 +assert mstruct.i == addr7 +memval = struct.unpack("I", jitter.vm.get_mem(addr7, 4))[0] +assert memval == 8 + + +# Str tests +## Basic tests +memstr = MemStr(jitter.vm, addr_str) +memstr.value = "" +assert memstr.value == "" +assert jitter.vm.get_mem(memstr.get_addr(), 1) == '\x00' +memstr.value = "lala" +assert jitter.vm.get_mem(memstr.get_addr(), memstr.get_size()) == 'lala\x00' +jitter.vm.set_mem(memstr.get_addr(), 'MIAMs\x00') +assert memstr.value == 'MIAMs' + +## Ptr(MemStr) manipulations +mstruct.s = memstr.get_addr() +assert mstruct.s == addr_str +assert mstruct.deref_s == memstr +assert mstruct.deref_s.value == 'MIAMs' +mstruct.deref_s.value = "That's all folks!" +assert mstruct.deref_s.value == "That's all folks!" +assert memstr.value == "That's all folks!" + +## Other address, same value, same encoding +memstr2 = MemStr(jitter.vm, addr_str2) +memstr2.value = "That's all folks!" +assert memstr2.get_addr() != memstr.get_addr() +assert memstr2 == memstr + +## Same value, other encoding +memstr3 = MemStr(jitter.vm, addr_str3, "utf16") +memstr3.value = "That's all folks!" +assert memstr3.get_addr() != memstr.get_addr() +assert memstr3.get_size() != memstr.get_size() # Size is different +assert str(memstr3) != str(memstr) # Mem representation is different +assert memstr3 != memstr # Encoding is different, so they are not eq +assert memstr3.value == memstr.value # But the python value is the same + + +# MemArray tests +memarray = MemArray(jitter.vm, addr6, Num("I")) +# This also works: +_memarray = mem_array_type(Num("I"))(jitter.vm, addr6) +memarray[0] = 0x02 +assert memarray[0] == 0x02 +assert jitter.vm.get_mem(memarray.get_addr(), + Num("I").size()) == '\x02\x00\x00\x00' +memarray[2] = 0xbbbbbbbb +assert memarray[2] == 0xbbbbbbbb +assert jitter.vm.get_mem(memarray.get_addr() + 2 * Num("I").size(), + Num("I").size()) == '\xbb\xbb\xbb\xbb' +try: + s = str(memarray) + assert False, "Should raise" +except (NotImplementedError, ValueError): + pass +try: + s = len(memarray) + assert False, "Should raise" +except (NotImplementedError, ValueError): + pass + +## Slice assignment +memarray[2:4] = [3, 3] +assert memarray[2] == 3 +assert memarray[3] == 3 +assert memarray[2:4] == [3, 3] +try: + memarray[2:4] = [3, 3, 3] + assert False, "Should raise, mismatched sizes" +except (ValueError): + pass + +try: + memarray[1, 2] + assert False, "Should raise, mismatched sizes" +except (ValueError): + pass + + +# MemSizedArray tests +memsarray = MemSizedArray(jitter.vm, addr6, Num("I"), 10) +# This also works: +_memsarray = mem_sized_array_type(Num("I"), 10)(jitter.vm, addr6) +# And mem_sized_array_type generates statically sized types +assert _memsarray.sizeof() == len(memsarray) +memsarray.memset('\xcc') +assert memsarray[0] == 0xcccccccc +assert len(memsarray) == 10 * 4 +assert str(memsarray) == '\xcc' * (4 * 10) +for val in memsarray: + assert val == 0xcccccccc +assert list(memsarray) == [0xcccccccc] * 10 +memsarray[0] = 2 +assert memsarray[0] == 2 +assert str(memsarray) == '\x02\x00\x00\x00' + '\xcc' * (4 * 9) + + +# Atypical fields (Struct and Array) +class MyStruct2(MemStruct): + fields = [ + ("s1", Struct("=BI")), + ("s2", Array(Num("B"), 10)), + ] + +ms2 = MyStruct2(jitter.vm, addr5) +ms2.memset('\xaa') +assert len(ms2) == 15 + +## Struct +assert len(ms2.s1) == 2 +assert ms2.s1[0] == 0xaa +assert ms2.s1[1] == 0xaaaaaaaa + +## Array +### Basic checks +assert len(ms2.s2) == 10 +for val in ms2.s2: + assert val == 0xaa +assert ms2.s2[0] == 0xaa +assert ms2.s2[9] == 0xaa + +### Subscript assignment +ms2.s2[3] = 2 +assert ms2.s2[3] == 2 + +### Field assignment (list) +ms2.s2 = [1] * 10 +for val in ms2.s2: + assert val == 1 + +### Field assignment (MemSizedArray) +jitter.vm.set_mem(addr4, '\x02'*10) +array2 = MemSizedArray(jitter.vm, addr4, Num("B"), 10) +for val in array2: + assert val == 2 +ms2.s2 = array2 +for val in ms2.s2: + assert val == 2 + + +# Inline tests +class InStruct(MemStruct): + fields = [ + ("foo", Num("B")), + ("bar", Num("B")), + ] + +class ContStruct(MemStruct): + fields = [ + ("one", Num("B")), + ("instruct", Inline(InStruct)), + ("last", Num("B")), + ] + +cont = ContStruct(jitter.vm, addr4) +cont.memset() +assert len(cont) == 4 +assert len(cont.instruct) == 2 +assert cont.one == 0 +assert cont.last == 0 +assert cont.instruct.foo == 0 +assert cont.instruct.bar == 0 +cont.memset('\x11') +assert cont.one == 0x11 +assert cont.last == 0x11 +assert cont.instruct.foo == 0x11 +assert cont.instruct.bar == 0x11 + +cont.one = 0x01 +cont.instruct.foo = 0x02 +cont.instruct.bar = 0x03 +cont.last = 0x04 +assert cont.one == 0x01 +assert cont.instruct.foo == 0x02 +assert cont.instruct.bar == 0x03 +assert cont.last == 0x04 +assert jitter.vm.get_mem(cont.get_addr(), len(cont)) == '\x01\x02\x03\x04' + +# Quick mem(MemField) test: +assert mem(Num("f"))(jitter.vm, addr) == mem(Num("f"))(jitter.vm, addr) + + +# Union test +class UniStruct(MemStruct): + fields = [ + ("one", Num("B")), + ("union", Union([ + ("instruct", Inline(InStruct)), + ("i", Num(">I")), + ])), + ("last", Num("B")), + ] + +uni = UniStruct(jitter.vm, addr8) +jitter.vm.set_mem(addr8, ''.join(chr(x) for x in xrange(len(uni)))) +assert len(uni) == 6 # 1 + max(InStruct.sizeof(), 4) + 1 +assert uni.one == 0x00 +assert uni.instruct.foo == 0x01 +assert uni.instruct.bar == 0x02 +assert uni.i == 0x01020304 +assert uni.last == 0x05 +uni.instruct.foo = 0x02 +assert uni.i == 0x02020304 +uni.i = 0x11223344 +assert uni.instruct.foo == 0x11 +assert uni.instruct.bar == 0x22 + + +# BitField test +class BitStruct(MemStruct): + fields = [ + ("flags", BitField(Num("H"), [ + ("f1_1", 1), + ("f2_5", 5), + ("f3_8", 8), + ("f4_1", 1), + ])), + ] + +bit = BitStruct(jitter.vm, addr9) +bit.memset() +assert bit.flags == 0 +assert bit.f1_1 == 0 +assert bit.f2_5 == 0 +assert bit.f3_8 == 0 +assert bit.f4_1 == 0 +bit.f1_1 = 1 +bit.f2_5 = 0b10101 +bit.f3_8 = 0b10000001 +assert bit.flags == 0b0010000001101011 +assert bit.f1_1 == 1 +assert bit.f2_5 == 0b10101 +assert bit.f3_8 == 0b10000001 +assert bit.f4_1 == 0 +bit.flags = 0b1101010101011100 +assert bit.f1_1 == 0 +assert bit.f2_5 == 0b01110 +assert bit.f3_8 == 0b01010101 +assert bit.f4_1 == 1 + + +# Unhealthy ideas +class UnhealthyIdeas(MemStruct): + fields = [ + ("f1", Ptr("I", MemArray, Struct("=Bf"))), + ("f2", Array(Ptr("I", MemStr), 10)), + ("f3", Ptr("I", MemSelf)), + ("f4", Array(Ptr("I", MemSelf), 2)), + ("f5", Ptr("I", Ptr("I", MemSelf))), + ] + +# Other way to handle self dependency and circular dependencies +# NOTE: in this case, MemSelf would have been fine +UnhealthyIdeas.fields.append( + ("f6", Ptr("I", Ptr("I", Ptr("I", UnhealthyIdeas))))) +# Regen all fields +UnhealthyIdeas.gen_fields() + +ideas = UnhealthyIdeas(jitter.vm, addr7) +ideas.memset() +ideas.f3 = ideas.get_addr() +assert ideas == ideas.deref_f3 + +ideas.f4[0] = ideas.get_addr() +assert ideas.f4.deref_get(0) == ideas +ideas.f4[1] = addr6 +ideas.f4.deref_set(1, ideas) +assert ideas.f4[1] != ideas.get_addr() +assert ideas.f4.deref_get(1) == ideas + +ideas.f5 = addr2 +ideas.deref_f5.value = ideas.get_addr() +assert ideas.deref_f5.value == ideas.get_addr() +assert ideas.deref_f5.deref_value == ideas + +ideas.deref_f5.value = addr3 +ideas.deref_f5.deref_value = ideas +assert ideas.deref_f5.value != ideas.get_addr() +assert ideas.deref_f5.deref_value == ideas + +ideas.f6 = addr4 +ideas.deref_f6.value = addr5 +ideas.deref_f6.deref_value.value = ideas.get_addr() +assert ideas.deref_f6.deref_value.deref_value == ideas + +# Cast tests +# MemStruct cast +MemInt = mem(Num("I")) +MemShort = mem(Num("H")) +dword = MemInt(jitter.vm, addr10) +dword.value = 0x12345678 +assert isinstance(dword.cast(MemShort), MemShort) +assert dword.cast(MemShort).value == 0x5678 + +# Field cast +ms2.s2[0] = 0x34 +ms2.s2[1] = 0x12 +assert ms2.cast_field("s2", MemShort).value == 0x1234 + +# Other method +assert MemShort(jitter.vm, ms2.get_addr("s2")).value == 0x1234 + +# Manual cast inside an Array +ms2.s2[4] = 0xcd +ms2.s2[5] = 0xab +assert MemShort(jitter.vm, ms2.s2.index2addr(4)).value == 0xabcd + +# void* style cast +MemPtrVoid = mem(Ptr("I", MemVoid)) +MemPtrMyStruct = mem(Ptr("I", MyStruct)) +p = MemPtrVoid(jitter.vm, addr11) +p.value = mstruct.get_addr() +assert p.deref_value.cast(MyStruct) == mstruct +assert p.cast(MemPtrMyStruct).deref_value == mstruct + +print "Some struct reprs:\n" +print repr(mstruct), '\n' +print repr(ms2), '\n' +print repr(cont), '\n' +print repr(uni), '\n' +print repr(bit), '\n' +print repr(bit), '\n' +print repr(ideas), '\n' +print repr(mem(Array(Inline(MyStruct2), 2))(jitter.vm, addr)), '\n' +print repr(mem(Num("f"))(jitter.vm, addr)), '\n' +print repr(memarray) +print repr(memsarray) +print repr(memstr) +print repr(memstr3) + +print "Ok" # That's all folks! diff --git a/test/test_all.py b/test/test_all.py index bc019104..71f036a2 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -238,6 +238,8 @@ testset += RegressionTest(["depgraph.py"], base_dir="analysis", (14, 1), (15, 1))) for fname in fnames]) +testset += RegressionTest(["mem.py"], base_dir="analysis") + # Examples class Example(Test): """Examples specificities: |