From ba2df16277d7d4deae118ed11e1e92cd478045ec Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 30 Nov 2015 16:00:26 +0100 Subject: MemStruct/Types: Renamed analysis.mem to core.types --- test/core/types.py | 506 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 506 insertions(+) create mode 100644 test/core/types.py (limited to 'test/core') diff --git a/test/core/types.py b/test/core/types.py new file mode 100644 index 00000000..db72449c --- /dev/null +++ b/test/core/types.py @@ -0,0 +1,506 @@ +#!/usr/bin/env python + +# miasm2.core.types tests + +import struct + +from miasm2.analysis.machine import Machine +from miasm2.core.types import MemStruct, Num, Ptr, Str, \ + Array, RawStruct, Union, \ + BitField, Self, Void, Bits, \ + set_allocator, MemUnion, Struct +from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm2.os_dep.common import heap + +# Two structures with some fields +class OtherStruct(MemStruct): + fields = [ + ("foo", Num("H")), + ] + +class MyStruct(MemStruct): + fields = [ + # Number field: just struct.pack fields with one value + ("num", Num("I")), + ("flags", Num("B")), + # This field is a pointer to another struct, it has a numeric + # value (mystruct.other.val) and can be dereferenced to get an + # OtherStruct instance (mystruct.other.deref) + ("other", Ptr("I", OtherStruct)), + # Ptr to a variable length String + ("s", Ptr("I", Str())), + ("i", Ptr("I", Num("I"))), + ] + +jitter = Machine("x86_32").jitter("python") +jitter.init_stack() +addr = 0x1000 +size = 0x1000 +addr_str = 0x1100 +addr_str2 = 0x1200 +addr_str3 = 0x1300 +# Initialize all mem with 0xaa +jitter.vm.add_memory_page(addr, PAGE_READ | PAGE_WRITE, "\xaa"*size) + + +# MemStruct tests +## Creation +# Use manual allocation with explicit addr for the first example +mstruct = MyStruct(jitter.vm, addr) +## Fields are read from the virtual memory +assert mstruct.num == 0xaaaaaaaa +assert mstruct.flags == 0xaa + +## Field assignment modifies virtual memory +mstruct.num = 3 +assert mstruct.num == 3 +memval = struct.unpack("I", jitter.vm.get_mem(mstruct.get_addr(), 4))[0] +assert memval == 3 + +## Memset sets the whole structure +mstruct.memset() +assert mstruct.num == 0 +assert mstruct.flags == 0 +assert mstruct.other.val == 0 +assert mstruct.s.val == 0 +assert mstruct.i.val == 0 +mstruct.memset('\x11') +assert mstruct.num == 0x11111111 +assert mstruct.flags == 0x11 +assert mstruct.other.val == 0x11111111 +assert mstruct.s.val == 0x11111111 +assert mstruct.i.val == 0x11111111 + + +# From now, just use heap.vm_alloc +my_heap = heap() +set_allocator(my_heap.vm_alloc) + + +# Ptr tests +## Setup for Ptr tests +# the addr field can now be omited since allocator is set +other = OtherStruct(jitter.vm) +other.foo = 0x1234 +assert other.foo == 0x1234 + +## Basic usage +mstruct.other.val = other.get_addr() +# This also works for now: +# mstruct.other = other.get_addr() +assert mstruct.other.val == other.get_addr() +assert mstruct.other.deref == other +assert mstruct.other.deref.foo == 0x1234 + +## Deref assignment +other2 = OtherStruct(jitter.vm) +other2.foo = 0xbeef +assert mstruct.other.deref != other2 +mstruct.other.deref = other2 +assert mstruct.other.deref == other2 +assert mstruct.other.deref.foo == 0xbeef +assert mstruct.other.val == other.get_addr() # Addr did not change +assert other.foo == 0xbeef # Deref assignment copies by value +assert other2.foo == 0xbeef +assert other.get_addr() != other2.get_addr() # Not the same address +assert other == other2 # But same value + +## Same stuff for Ptr to MemField +alloc_addr = my_heap.vm_alloc(jitter.vm, + mstruct.get_type().get_field_type("i") + .dst_type.sizeof()) +mstruct.i = alloc_addr +mstruct.i.deref.val = 8 +assert mstruct.i.deref.val == 8 +assert mstruct.i.val == alloc_addr +memval = struct.unpack("I", jitter.vm.get_mem(alloc_addr, 4))[0] +assert memval == 8 + + +# Str tests +## Basic tests +memstr = Str().pinned(jitter.vm, addr_str) +memstr.val = "" +assert memstr.val == "" +assert jitter.vm.get_mem(memstr.get_addr(), 1) == '\x00' +memstr.val = "lala" +assert jitter.vm.get_mem(memstr.get_addr(), memstr.get_size()) == 'lala\x00' +jitter.vm.set_mem(memstr.get_addr(), 'MIAMs\x00') +assert memstr.val == 'MIAMs' + +## Ptr(Str()) manipulations +mstruct.s.val = memstr.get_addr() +assert mstruct.s.val == addr_str +assert mstruct.s.deref == memstr +assert mstruct.s.deref.val == 'MIAMs' +mstruct.s.deref.val = "That's all folks!" +assert mstruct.s.deref.val == "That's all folks!" +assert memstr.val == "That's all folks!" + +## Other address, same value, same encoding +memstr2 = Str().pinned(jitter.vm, addr_str2) +memstr2.val = "That's all folks!" +assert memstr2.get_addr() != memstr.get_addr() +assert memstr2 == memstr + +## Same value, other encoding +memstr3 = Str("utf16").pinned(jitter.vm, addr_str3) +memstr3.val = "That's all folks!" +assert memstr3.get_addr() != memstr.get_addr() +assert memstr3.get_size() != memstr.get_size() # Size is different +assert str(memstr3) != str(memstr) # Mem representation is different +assert memstr3 != memstr # Encoding is different, so they are not eq +assert memstr3.val == memstr.val # But the python value is the same + + +# Array tests +# Allocate buffer manually, since memarray is unsized +alloc_addr = my_heap.vm_alloc(jitter.vm, 0x100) +memarray = Array(Num("I")).pinned(jitter.vm, alloc_addr) +memarray[0] = 0x02 +assert memarray[0] == 0x02 +assert jitter.vm.get_mem(memarray.get_addr(), + Num("I").size()) == '\x02\x00\x00\x00' +memarray[2] = 0xbbbbbbbb +assert memarray[2] == 0xbbbbbbbb +assert jitter.vm.get_mem(memarray.get_addr() + 2 * Num("I").size(), + Num("I").size()) == '\xbb\xbb\xbb\xbb' +try: + s = str(memarray) + assert False, "Should raise" +except (NotImplementedError, ValueError): + pass +try: + s = len(memarray) + assert False, "Should raise" +except (NotImplementedError, ValueError): + pass + +## Slice assignment +memarray[2:4] = [3, 3] +assert memarray[2] == 3 +assert memarray[3] == 3 +assert memarray[2:4] == [3, 3] +try: + memarray[2:4] = [3, 3, 3] + assert False, "Should raise, mismatched sizes" +except ValueError: + pass + + +memsarray = Array(Num("I"), 10).pinned(jitter.vm) +# And Array(type, size).pinned generates statically sized types +assert memsarray.sizeof() == Num("I").size() * 10 +memsarray.memset('\xcc') +assert memsarray[0] == 0xcccccccc +assert len(memsarray) == 10 * 4 +assert str(memsarray) == '\xcc' * (4 * 10) +for val in memsarray: + assert val == 0xcccccccc +assert list(memsarray) == [0xcccccccc] * 10 +memsarray[0] = 2 +assert memsarray[0] == 2 +assert str(memsarray) == '\x02\x00\x00\x00' + '\xcc' * (4 * 9) + + +# Atypical fields (RawStruct and Array) +class MyStruct2(MemStruct): + fields = [ + ("s1", RawStruct("=BI")), + ("s2", Array(Num("B"), 10)), + ] + +ms2 = MyStruct2(jitter.vm) +ms2.memset('\xaa') +assert len(ms2) == 15 + +## RawStruct +assert len(ms2.s1) == 2 +assert ms2.s1[0] == 0xaa +assert ms2.s1[1] == 0xaaaaaaaa + +## Array +### Basic checks +assert len(ms2.s2) == 10 +for val in ms2.s2: + assert val == 0xaa +assert ms2.s2[0] == 0xaa +assert ms2.s2[9] == 0xaa + +### Subscript assignment +ms2.s2[3] = 2 +assert ms2.s2[3] == 2 + +### Field assignment (list) +ms2.s2 = [1] * 10 +for val in ms2.s2: + assert val == 1 + +### Field assignment (MemSizedArray) +array2 = Array(Num("B"), 10).pinned(jitter.vm) +jitter.vm.set_mem(array2.get_addr(), '\x02'*10) +for val in array2: + assert val == 2 +ms2.s2 = array2 +for val in ms2.s2: + assert val == 2 + + +# Inlining a MemType tests +class InStruct(MemStruct): + fields = [ + ("foo", Num("B")), + ("bar", Num("B")), + ] + +class ContStruct(MemStruct): + fields = [ + ("one", Num("B")), + ("instruct", InStruct.get_type()), + ("last", Num("B")), + ] + +cont = ContStruct(jitter.vm) +cont.memset() +assert len(cont) == 4 +assert len(cont.instruct) == 2 +assert cont.one == 0 +assert cont.last == 0 +assert cont.instruct.foo == 0 +assert cont.instruct.bar == 0 +cont.memset('\x11') +assert cont.one == 0x11 +assert cont.last == 0x11 +assert cont.instruct.foo == 0x11 +assert cont.instruct.bar == 0x11 + +cont.one = 0x01 +cont.instruct.foo = 0x02 +cont.instruct.bar = 0x03 +cont.last = 0x04 +assert cont.one == 0x01 +assert cont.instruct.foo == 0x02 +assert cont.instruct.bar == 0x03 +assert cont.last == 0x04 +assert jitter.vm.get_mem(cont.get_addr(), len(cont)) == '\x01\x02\x03\x04' + + +# Union test +class UniStruct(MemStruct): + fields = [ + ("one", Num("B")), + ("union", Union([ + ("instruct", InStruct.get_type()), + ("i", Num(">I")), + ])), + ("last", Num("B")), + ] + +uni = UniStruct(jitter.vm) +jitter.vm.set_mem(uni.get_addr(), ''.join(chr(x) for x in xrange(len(uni)))) +assert len(uni) == 6 # 1 + max(InStruct.sizeof(), 4) + 1 +assert uni.one == 0x00 +assert uni.union.instruct.foo == 0x01 +assert uni.union.instruct.bar == 0x02 +assert uni.union.i == 0x01020304 +assert uni.last == 0x05 +uni.union.instruct.foo = 0x02 +assert uni.union.i == 0x02020304 +uni.union.i = 0x11223344 +assert uni.union.instruct.foo == 0x11 +assert uni.union.instruct.bar == 0x22 + + +# BitField test +class BitStruct(MemUnion): + fields = [ + ("flags_num", Num("H")), + ("flags", BitField(Num("H"), [ + ("f1_1", 1), + ("f2_5", 5), + ("f3_8", 8), + ("f4_1", 1), + ])), + ] + +bit = BitStruct(jitter.vm) +bit.memset() +assert bit.flags_num == 0 +assert bit.flags.f1_1 == 0 +assert bit.flags.f2_5 == 0 +assert bit.flags.f3_8 == 0 +assert bit.flags.f4_1 == 0 +bit.flags.f1_1 = 1 +bit.flags.f2_5 = 0b10101 +bit.flags.f3_8 = 0b10000001 +assert bit.flags_num == 0b0010000001101011 +assert bit.flags.f1_1 == 1 +assert bit.flags.f2_5 == 0b10101 +assert bit.flags.f3_8 == 0b10000001 +assert bit.flags.f4_1 == 0 +bit.flags_num = 0b1101010101011100 +assert bit.flags.f1_1 == 0 +assert bit.flags.f2_5 == 0b01110 +assert bit.flags.f3_8 == 0b01010101 +assert bit.flags.f4_1 == 1 + + +# Unhealthy ideas +class UnhealthyIdeas(MemStruct): + fields = [ + ("pastruct", Ptr("I", Array(RawStruct("=Bf")))), + ("apstr", Array(Ptr("I", Str()), 10)), + ("pself", Ptr("I", Self())), + ("apself", Array(Ptr("I", Self()), 2)), + ("ppself", Ptr("I", Ptr("I", Self()))), + ("pppself", Ptr("I", Ptr("I", Ptr("I", Self())))), + ] + +p_size = Ptr("I", Void()).size() + +ideas = UnhealthyIdeas(jitter.vm) +ideas.memset() +ideas.pself = ideas.get_addr() +assert ideas == ideas.pself.deref + +ideas.apself[0] = ideas.get_addr() +assert ideas.apself[0].deref == ideas +ideas.apself[1] = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) +ideas.apself[1].deref = ideas +assert ideas.apself[1] != ideas.get_addr() +assert ideas.apself[1].deref == ideas + +ideas.ppself = my_heap.vm_alloc(jitter.vm, p_size) +ideas.ppself.deref.val = ideas.get_addr() +assert ideas.ppself.deref.val == ideas.get_addr() +assert ideas.ppself.deref.deref == ideas + +ideas.ppself.deref.val = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) +ideas.ppself.deref.deref = ideas +assert ideas.ppself.deref.val != ideas.get_addr() +assert ideas.ppself.deref.deref == ideas + +ideas.pppself = my_heap.vm_alloc(jitter.vm, p_size) +ideas.pppself.deref.val = my_heap.vm_alloc(jitter.vm, p_size) +ideas.pppself.deref.deref.val = ideas.get_addr() +assert ideas.pppself.deref.deref.deref == ideas + + +# Circular dependencies +class A(MemStruct): + pass + +class B(MemStruct): + fields = [("a", Ptr("I", A)),] + +# Gen A's fields after declaration +A.gen_fields([("b", Ptr("I", B)),]) + +a = A(jitter.vm) +b = B(jitter.vm) +a.b.val = b.get_addr() +b.a.val = a.get_addr() +assert a.b.deref == b +assert b.a.deref == a + + +# Cast tests +# MemStruct cast +MemInt = Num("I").pinned +MemShort = Num("H").pinned +dword = MemInt(jitter.vm) +dword.val = 0x12345678 +assert isinstance(dword.cast(MemShort), MemShort) +assert dword.cast(MemShort).val == 0x5678 + +# Field cast +ms2.s2[0] = 0x34 +ms2.s2[1] = 0x12 +assert ms2.cast_field("s2", MemShort).val == 0x1234 + +# Other method +assert MemShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 + +# Manual cast inside an Array +ms2.s2[4] = 0xcd +ms2.s2[5] = 0xab +assert MemShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd + +# void* style cast +MemPtrVoid = Ptr("I", Void()).pinned +p = MemPtrVoid(jitter.vm) +p.val = mstruct.get_addr() +assert p.deref.cast(MyStruct) == mstruct +assert p.cast(Ptr("I", MyStruct)).deref == mstruct + +# Field equality tests +assert RawStruct("IH") == RawStruct("IH") +assert RawStruct("I") != RawStruct("IH") +assert Num("I") == Num("I") +assert Num(">I") != Num("I", MyStruct) != Ptr(" Date: Fri, 4 Dec 2015 10:16:37 +0100 Subject: MemStruct/Types: pinned renamed to lval --- example/jitter/types.py | 2 ++ miasm2/core/types.py | 79 ++++++++++++++++++++++++++++++++++--------------- test/core/types.py | 44 +++++++++++++-------------- 3 files changed, 79 insertions(+), 46 deletions(-) (limited to 'test/core') diff --git a/example/jitter/types.py b/example/jitter/types.py index 6d8543b4..f4a7ddb4 100644 --- a/example/jitter/types.py +++ b/example/jitter/types.py @@ -229,6 +229,8 @@ print "See that the original array has been modified:" print repr(data) print +# TODO: type manipulation examples + print "See test/core/types.py and the miasm2.core.types module doc for " print "more information." diff --git a/miasm2/core/types.py b/miasm2/core/types.py index 3c8d5b8b..d6bc3cf5 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -1,5 +1,34 @@ -"""This module provides classes to manipulate C structures backed by a VmMngr -object (a miasm sandbox virtual memory). +"""This module provides classes to manipulate pure C types as well as their +representation in memory. A typical usecase is to use this module to +easily manipylate structures backed by a VmMngr object (a miasm sandbox virtual +memory): + + class ListNode(MemStruct): + fields = [ + ("next", Ptr(", ): - # FIXME: "I" => "u32" class MyStruct(MemStruct): fields = [ # Scalar field: just struct.pack field with one value @@ -210,10 +238,13 @@ class Type(object): return self._unpack(raw) @property - def pinned(self): + def lval(self): """Returns a class with a (vm, addr) constructor that allows to interact with this type in memory. + In compilation terms, it returns a class allowing to instanciate an + lvalue of this type. + @return: a MemType subclass. """ if self in DYN_MEM_STRUCT_CACHE: @@ -225,7 +256,7 @@ class Type(object): def _build_pinned_type(self): """Builds the MemType subclass allowing to interract with this type. - Called by self.pinned when it is not in cache. + Called by self.lval when it is not in cache. """ pinned_base_class = self._get_pinned_base_class() pinned_type = type("Mem%r" % self, (pinned_base_class,), @@ -324,7 +355,7 @@ class Ptr(Num): in memory @dst_type: (MemType or Type) the MemType this Ptr points to. If a Type is given, it is transformed into a MemType with - TheType.pinned. + TheType.lval. *type_args, **type_kwargs: arguments to pass to the the pointed MemType when instanciating it (e.g. for MemStr encoding or MemArray field_type). @@ -342,10 +373,10 @@ class Ptr(Num): dst_type._get_self_type = lambda: self._get_self_type() # dst_type cannot be patched here, since _get_self_type of the outer # class has not yet been set. Patching dst_type involves calling - # dst_type.pinned, which will only return a type that does not point + # dst_type.lval, which will only return a type that does not point # on MemSelf but on the right class only when _get_self_type of the # outer class has been replaced by _MetaMemStruct. - # In short, dst_type = dst_type.pinned is not valid here, it is done + # In short, dst_type = dst_type.lval is not valid here, it is done # lazily in _fix_dst_type self._dst_type = dst_type self._type_args = type_args @@ -358,7 +389,7 @@ class Ptr(Num): else: raise ValueError("Unsupported usecase for MemSelf, sorry") if isinstance(self._dst_type, Type): - self._dst_type = self._dst_type.pinned + self._dst_type = self._dst_type.lval @property def dst_type(self): @@ -374,7 +405,7 @@ class Ptr(Num): super(Ptr, self).set(vm, addr, val) def get(self, vm, addr): - return self.pinned(vm, addr) + return self.lval(vm, addr) def get_val(self, vm, addr): """Get the numeric value of a Ptr""" @@ -429,14 +460,14 @@ class Struct(Type): Mapped to MemStruct. - NOTE: The `.pinned` property of Struct creates classes on the fly. If an + NOTE: The `.lval` property of Struct creates classes on the fly. If an equivalent structure is created by subclassing MemStruct, an exception is raised to prevent creating multiple classes designating the same type. Example: s = Struct("Toto", [("f1", Num("I")), ("f2", Num("I"))]) - Toto1 = s.pinned + Toto1 = s.lval # This raises an exception, because it describes the same structure as # Toto1 @@ -469,7 +500,7 @@ class Struct(Type): vm.set_mem(addr, raw) def get(self, vm, addr): - return self.pinned(vm, addr) + return self.lval(vm, addr) def get_field(self, vm, addr, name): """Get a field value by @name and base structure @addr in @vm VmMngr.""" @@ -618,7 +649,7 @@ class Array(Type): "Assignment only implemented for list and MemSizedArray") def get(self, vm, addr): - return self.pinned(vm, addr) + return self.lval(vm, addr) def size(self): if self.is_sized(): @@ -949,7 +980,7 @@ class MemType(object): virtual memory. Globally, MemTypes are not meant to be used directly: specialized - subclasses are generated by Type(...).pinned and should be used instead. + subclasses are generated by Type(...).lval and should be used instead. The main exception is MemStruct, which you may want to subclass yourself for syntactic ease. """ @@ -1022,11 +1053,11 @@ class MemType(object): """Cast this MemType to another MemType (same address, same vm, but different type). Return the casted MemType. - @other_type: either a Type instance (other_type.pinned is used) or a + @other_type: either a Type instance (other_type.lval is used) or a MemType subclass """ if isinstance(other_type, Type): - other_type = other_type.pinned + other_type = other_type.lval return other_type(self._vm, self.get_addr()) def cast_field(self, field, other_type, *type_args, **type_kwargs): @@ -1035,7 +1066,7 @@ class MemType(object): @field: field specification, for example its name for a struct, or an index in an array. See the subclass doc. - @other_type: either a Type instance (other_type.pinned is used) or a + @other_type: either a Type instance (other_type.lval is used) or a MemType subclass """ raise NotImplementedError("Abstract") @@ -1127,7 +1158,7 @@ class MemStruct(MemType): assert mstruct.other.deref.foo == 0x1234 Note that: - MyStruct = Struct("MyStruct", ).pinned + MyStruct = Struct("MyStruct", ).lval is equivalent to the previous MyStruct declaration. See the various Type-s doc for more information. See MemStruct.gen_fields @@ -1166,7 +1197,7 @@ class MemStruct(MemType): def cast_field(self, field, other_type): """In this implementation, @field is a field name""" if isinstance(other_type, Type): - other_type = other_type.pinned + other_type = other_type.lval return other_type(self._vm, self.get_addr(field)) # Field generation method, voluntarily public to be able to gen fields @@ -1214,7 +1245,7 @@ class MemStruct(MemType): "one. Use it instead.") # Register this class so that another one will not be created when - # calling cls._type.pinned + # calling cls._type.lval DYN_MEM_STRUCT_CACHE[cls._type] = cls cls._gen_attributes() @@ -1336,7 +1367,7 @@ class MemArray(MemType): It can be indexed for setting and getting elements, example: - array = Array(Num("I")).pinned(vm, addr)) + array = Array(Num("I")).lval(vm, addr)) array[2] = 5 array[4:8] = [0, 1, 2, 3] print array[20] @@ -1371,7 +1402,7 @@ class MemSizedArray(MemArray): """A fixed size MemArray. This type is dynamically sized. Generate a fixed @field_type and @array_len - array which has a static size by using Array(type, size).pinned. + array which has a static size by using Array(type, size).lval. """ @property diff --git a/test/core/types.py b/test/core/types.py index db72449c..f1ff706b 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -119,7 +119,7 @@ assert memval == 8 # Str tests ## Basic tests -memstr = Str().pinned(jitter.vm, addr_str) +memstr = Str().lval(jitter.vm, addr_str) memstr.val = "" assert memstr.val == "" assert jitter.vm.get_mem(memstr.get_addr(), 1) == '\x00' @@ -138,13 +138,13 @@ assert mstruct.s.deref.val == "That's all folks!" assert memstr.val == "That's all folks!" ## Other address, same value, same encoding -memstr2 = Str().pinned(jitter.vm, addr_str2) +memstr2 = Str().lval(jitter.vm, addr_str2) memstr2.val = "That's all folks!" assert memstr2.get_addr() != memstr.get_addr() assert memstr2 == memstr ## Same value, other encoding -memstr3 = Str("utf16").pinned(jitter.vm, addr_str3) +memstr3 = Str("utf16").lval(jitter.vm, addr_str3) memstr3.val = "That's all folks!" assert memstr3.get_addr() != memstr.get_addr() assert memstr3.get_size() != memstr.get_size() # Size is different @@ -156,7 +156,7 @@ assert memstr3.val == memstr.val # But the python value is the same # Array tests # Allocate buffer manually, since memarray is unsized alloc_addr = my_heap.vm_alloc(jitter.vm, 0x100) -memarray = Array(Num("I")).pinned(jitter.vm, alloc_addr) +memarray = Array(Num("I")).lval(jitter.vm, alloc_addr) memarray[0] = 0x02 assert memarray[0] == 0x02 assert jitter.vm.get_mem(memarray.get_addr(), @@ -188,8 +188,8 @@ except ValueError: pass -memsarray = Array(Num("I"), 10).pinned(jitter.vm) -# And Array(type, size).pinned generates statically sized types +memsarray = Array(Num("I"), 10).lval(jitter.vm) +# And Array(type, size).lval generates statically sized types assert memsarray.sizeof() == Num("I").size() * 10 memsarray.memset('\xcc') assert memsarray[0] == 0xcccccccc @@ -237,7 +237,7 @@ for val in ms2.s2: assert val == 1 ### Field assignment (MemSizedArray) -array2 = Array(Num("B"), 10).pinned(jitter.vm) +array2 = Array(Num("B"), 10).lval(jitter.vm) jitter.vm.set_mem(array2.get_addr(), '\x02'*10) for val in array2: assert val == 2 @@ -406,8 +406,8 @@ assert b.a.deref == a # Cast tests # MemStruct cast -MemInt = Num("I").pinned -MemShort = Num("H").pinned +MemInt = Num("I").lval +MemShort = Num("H").lval dword = MemInt(jitter.vm) dword.val = 0x12345678 assert isinstance(dword.cast(MemShort), MemShort) @@ -427,7 +427,7 @@ ms2.s2[5] = 0xab assert MemShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd # void* style cast -MemPtrVoid = Ptr("I", Void()).pinned +MemPtrVoid = Ptr("I", Void()).lval p = MemPtrVoid(jitter.vm) p.val = mstruct.get_addr() assert p.deref.cast(MyStruct) == mstruct @@ -474,17 +474,17 @@ assert BitField(Num("B"), [("f1", 1), ("f2", 4), ("f3", 1)]) != \ BitField(Num("B"), [("f1", 2), ("f2", 4), ("f3", 1)]) -# Quick MemField.pinned/MemField hash test -assert Num("f").pinned(jitter.vm, addr) == Num("f").pinned(jitter.vm, addr) +# Quick MemField.lval/MemField hash test +assert Num("f").lval(jitter.vm, addr) == Num("f").lval(jitter.vm, addr) # Types are cached -assert Num("f").pinned == Num("f").pinned -assert Num("d").pinned != Num("f").pinned -assert Union([("f1", Num("I")), ("f2", Num("H"))]).pinned == \ - Union([("f1", Num("I")), ("f2", Num("H"))]).pinned -assert Array(Num("B")).pinned == Array(Num("B")).pinned -assert Array(Num("I")).pinned != Array(Num("B")).pinned -assert Array(Num("B"), 20).pinned == Array(Num("B"), 20).pinned -assert Array(Num("B"), 19).pinned != Array(Num("B"), 20).pinned +assert Num("f").lval == Num("f").lval +assert Num("d").lval != Num("f").lval +assert Union([("f1", Num("I")), ("f2", Num("H"))]).lval == \ + Union([("f1", Num("I")), ("f2", Num("H"))]).lval +assert Array(Num("B")).lval == Array(Num("B")).lval +assert Array(Num("I")).lval != Array(Num("B")).lval +assert Array(Num("B"), 20).lval == Array(Num("B"), 20).lval +assert Array(Num("B"), 19).lval != Array(Num("B"), 20).lval # Repr tests @@ -496,8 +496,8 @@ print repr(cont), '\n' print repr(uni), '\n' print repr(bit), '\n' print repr(ideas), '\n' -print repr(Array(MyStruct2.get_type(), 2).pinned(jitter.vm, addr)), '\n' -print repr(Num("f").pinned(jitter.vm, addr)), '\n' +print repr(Array(MyStruct2.get_type(), 2).lval(jitter.vm, addr)), '\n' +print repr(Num("f").lval(jitter.vm, addr)), '\n' print repr(memarray) print repr(memsarray) print repr(memstr) -- cgit 1.4.1 From 36cae74bff4674396b35a208bc7ac57f0d4e2b6b Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Sat, 5 Dec 2015 14:44:23 +0100 Subject: Types: Type size is now a property --- example/jitter/types.py | 2 +- miasm2/core/types.py | 35 +++++++++++++++++++++-------------- test/core/types.py | 10 +++++----- 3 files changed, 27 insertions(+), 20 deletions(-) (limited to 'test/core') diff --git a/example/jitter/types.py b/example/jitter/types.py index 6c0b59af..e714372c 100644 --- a/example/jitter/types.py +++ b/example/jitter/types.py @@ -171,7 +171,7 @@ link.push(DataArray(vm)) assert link.size == 3 # If you get it directly from the VM, it is updated as well raw_size = vm.get_mem(link.get_addr("size"), link.get_type() - .get_field_type("size").size()) + .get_field_type("size").size) assert raw_size == '\x03\x00\x00\x00' print "The linked list just built:" diff --git a/miasm2/core/types.py b/miasm2/core/types.py index bf8f7823..03d23a5c 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -262,7 +262,7 @@ class Type(object): def get(self, vm, addr): """Get the python value of a field from a VmMngr memory at @addr.""" - raw = vm.get_mem(addr, self.size()) + raw = vm.get_mem(addr, self.size) return self._unpack(raw) @property @@ -306,12 +306,13 @@ class Type(object): """ self._self_type = self_type + @property def size(self): """Return the size in bytes of the serialized version of this field""" raise NotImplementedError() def __len__(self): - return self.size() + return self.size def __neq__(self, other): return not self == other @@ -332,6 +333,7 @@ class RawStruct(Type): def _unpack(self, raw_str): return struct.unpack(self._fmt, raw_str) + @property def size(self): return struct.calcsize(self._fmt) @@ -517,7 +519,7 @@ class Struct(Type): # For reflexion field._set_self_type(self) self._fields_desc[name] = {"field": field, "offset": offset} - offset += field.size() + offset += field.size @property def fields(self): @@ -549,8 +551,9 @@ class Struct(Type): offset = self.get_offset(name) field.set(vm, addr + offset, val) + @property def size(self): - return sum(field.size() for _, field in self.fields) + return sum(field.size for _, field in self.fields) def get_offset(self, field_name): """ @@ -609,8 +612,9 @@ class Union(Struct): """@field_list: a [(name, field)] list, see the class doc""" super(Union, self).__init__("union", field_list) + @property def size(self): - return max(field.size() for _, field in self.fields) + return max(field.size for _, field in self.fields) def get_offset(self, field_name): return 0 @@ -658,7 +662,7 @@ class Array(Type): def set(self, vm, addr, val): # MemSizedArray assignment if isinstance(val, MemSizedArray): - if val.array_len != self.array_len or len(val) != self.size(): + if val.array_len != self.array_len or len(val) != self.size: raise ValueError("Size mismatch in MemSizedArray assignment") raw = str(val) vm.set_mem(addr, raw) @@ -670,7 +674,7 @@ class Array(Type): offset = 0 for elt in val: self.field_type.set(vm, addr + offset, elt) - offset += self.field_type.size() + offset += self.field_type.size else: raise RuntimeError( @@ -679,6 +683,7 @@ class Array(Type): def get(self, vm, addr): return self.lval(vm, addr) + @property def size(self): if self.is_sized(): return self.get_offset(self.array_len) @@ -688,7 +693,7 @@ class Array(Type): def get_offset(self, idx): """Returns the offset of the item at index @idx.""" - return self.field_type.size() * idx + return self.field_type.size * idx def get_item(self, vm, addr, idx): """Get the item(s) at index @idx. @@ -745,7 +750,7 @@ class Array(Type): def _check_bounds(self, idx): if not isinstance(idx, (int, long)): raise ValueError("index must be an int or a long") - if idx < 0 or (self.is_sized() and idx >= self.size()): + if idx < 0 or (self.is_sized() and idx >= self.size): raise IndexError("Index %s out of bounds" % idx) def _get_pinned_base_class(self): @@ -786,7 +791,7 @@ class Bits(Type): def set(self, vm, addr, val): val_mask = (1 << self._bits) - 1 val_shifted = (val & val_mask) << self._bit_offset - num_size = self._num.size() * 8 + num_size = self._num.size * 8 full_num_mask = (1 << num_size) - 1 num_mask = (~(val_mask << self._bit_offset)) & full_num_mask @@ -801,8 +806,9 @@ class Bits(Type): res_val = (num_val >> self._bit_offset) & val_mask return res_val + @property def size(self): - return self._num.size() + return self._num.size @property def bit_size(self): @@ -868,7 +874,7 @@ class BitField(Union): for name, bits in bit_list: fields.append((name, Bits(self._num, bits, offset))) offset += bits - if offset > self._num.size() * 8: + if offset > self._num.size == 8: raise ValueError("sum of bit lengths is > to the backing num size") super(BitField, self).__init__(fields) @@ -968,6 +974,7 @@ class Str(Type): set_str = self.encodings[self.enc][1] set_str(vm, addr, s) + @property def size(self): """This type is unsized.""" raise ValueError("Str is unsized") @@ -1130,7 +1137,7 @@ class MemType(object): """Return the static size of this type. By default, it is the size of the underlying Type. """ - return cls._type.size() + return cls._type.size def get_size(self): """Return the dynamic size of this structure (e.g. the size of an @@ -1518,7 +1525,7 @@ class MemSizedArray(MemArray): return self.get_type().array_len def get_size(self): - return self.get_type().size() + return self.get_type().size def __iter__(self): for i in xrange(self.get_type().array_len): diff --git a/test/core/types.py b/test/core/types.py index f1ff706b..de6034ef 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -160,11 +160,11 @@ memarray = Array(Num("I")).lval(jitter.vm, alloc_addr) memarray[0] = 0x02 assert memarray[0] == 0x02 assert jitter.vm.get_mem(memarray.get_addr(), - Num("I").size()) == '\x02\x00\x00\x00' + Num("I").size) == '\x02\x00\x00\x00' memarray[2] = 0xbbbbbbbb assert memarray[2] == 0xbbbbbbbb -assert jitter.vm.get_mem(memarray.get_addr() + 2 * Num("I").size(), - Num("I").size()) == '\xbb\xbb\xbb\xbb' +assert jitter.vm.get_mem(memarray.get_addr() + 2 * Num("I").size, + Num("I").size) == '\xbb\xbb\xbb\xbb' try: s = str(memarray) assert False, "Should raise" @@ -190,7 +190,7 @@ except ValueError: memsarray = Array(Num("I"), 10).lval(jitter.vm) # And Array(type, size).lval generates statically sized types -assert memsarray.sizeof() == Num("I").size() * 10 +assert memsarray.sizeof() == Num("I").size * 10 memsarray.memset('\xcc') assert memsarray[0] == 0xcccccccc assert len(memsarray) == 10 * 4 @@ -356,7 +356,7 @@ class UnhealthyIdeas(MemStruct): ("pppself", Ptr("I", Ptr("I", Ptr("I", Self())))), ] -p_size = Ptr("I", Void()).size() +p_size = Ptr("I", Void()).size ideas = UnhealthyIdeas(jitter.vm) ideas.memset() -- cgit 1.4.1 From e9ab0bd0f9c6dde642904cb473d57de9c81747b5 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Sat, 5 Dec 2015 15:21:23 +0100 Subject: Types: short test to assert MemStruct unicity --- test/core/types.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'test/core') diff --git a/test/core/types.py b/test/core/types.py index de6034ef..96765fe7 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -486,6 +486,10 @@ assert Array(Num("I")).lval != Array(Num("B")).lval assert Array(Num("B"), 20).lval == Array(Num("B"), 20).lval assert Array(Num("B"), 19).lval != Array(Num("B"), 20).lval +# MemStruct unicity test +assert MyStruct == Struct(MyStruct.__name__, MyStruct.fields).lval +assert MyStruct.get_type() == Struct(MyStruct.__name__, MyStruct.fields) + # Repr tests -- cgit 1.4.1 From 0379f8e91fa54fe641948f01bb98a76fab47033a Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 14 Dec 2015 11:12:42 +0100 Subject: Types: adding the ("field", SomeMemType) syntax Shorthand for ("field", SomeMemStruct.get_type()) in a Struct or MemStruct fields definition. --- miasm2/core/types.py | 23 ++++++++++++++++++----- test/core/types.py | 5 +++-- 2 files changed, 21 insertions(+), 7 deletions(-) (limited to 'test/core') diff --git a/miasm2/core/types.py b/miasm2/core/types.py index 03d23a5c..8d9687eb 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -507,19 +507,32 @@ class Struct(Type): def __init__(self, name, fields): self.name = name - # fields is immutable - self._fields = tuple(fields) - self._gen_fields() + # generates self._fields and self._fields_desc + self._gen_fields(fields) - def _gen_fields(self): + def _gen_fields(self, fields): """Precompute useful metadata on self.fields.""" self._fields_desc = {} offset = 0 - for name, field in self._fields: + + # Build a proper (name, Field()) list, handling cases where the user + # supplies a MemType subclass instead of a Type instance + real_fields = [] + for name, field in fields: + if isinstance(field, type) and issubclass(field, MemType): + if field._type is None: + raise ValueError("%r has no static type; use a subclasses " + "with a non null _type or use a " + "Type instance") + field = field.get_type() + real_fields.append((name, field)) + # For reflexion field._set_self_type(self) self._fields_desc[name] = {"field": field, "offset": offset} offset += field.size + # fields is immutable + self._fields = tuple(real_fields) @property def fields(self): diff --git a/test/core/types.py b/test/core/types.py index 96765fe7..c59a68d6 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -256,7 +256,8 @@ class InStruct(MemStruct): class ContStruct(MemStruct): fields = [ ("one", Num("B")), - ("instruct", InStruct.get_type()), + # Shorthand for: ("instruct", InStruct.get_type()), + ("instruct", InStruct), ("last", Num("B")), ] @@ -290,7 +291,7 @@ class UniStruct(MemStruct): fields = [ ("one", Num("B")), ("union", Union([ - ("instruct", InStruct.get_type()), + ("instruct", InStruct), ("i", Num(">I")), ])), ("last", Num("B")), -- cgit 1.4.1 From 03b3a84e0dd4d4d01b471f1767d4aec68b9a90ad Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Tue, 15 Dec 2015 16:35:31 +0100 Subject: Types: Support anonymous Struct/Union/BitField See the test addition for an example. A Struct, Union, or BitField field with no name will be considered anonymous: all its fields will be added to the parent Struct/Union/BitField. This implements this kind of C declaration: struct foo { int a; union { int bar; struct { short baz; short foz; }; }; } --- miasm2/core/types.py | 83 +++++++++++++++++++++++++++++++++++++++++++++------- test/core/types.py | 27 +++++++++++++++++ 2 files changed, 99 insertions(+), 11 deletions(-) (limited to 'test/core') diff --git a/miasm2/core/types.py b/miasm2/core/types.py index 8d9687eb..4bab3bde 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -370,7 +370,7 @@ class Ptr(Num): MemType. Mapped to MemPtr (see its doc for more info): - + assert isinstance(mystruct.ptr, MemPtr) mystruct.ptr = 0x4000 # Assign the Ptr numeric value mystruct.ptr.val = 0x4000 # Also assigns the Ptr numeric value @@ -503,6 +503,24 @@ class Struct(Type): # Toto1 class Toto(MemStruct): fields = [("f1", Num("I")), ("f2", Num("I"))] + + Anonymous Struct, Union or BitField can be used if their field name + evaluates to False ("" or None). Such anonymous Struct field will generate + fields to the parent Struct, e.g.: + bla = Struct("Bla", [ + ("a", Num("B")), + ("", Union([("b1", Num("B")), ("b2", Num("H"))])), + ("", Struct("", [("c1", Num("B")), ("c2", Num("B"))])), + ] + Will have a b1, b2 and c1, c2 field directly accessible. The anonymous + fields are renamed to "__anon_", with an incremented number. + + In such case, bla.fields will not contain b1, b2, c1 and c2 (only the 3 + actual fields, with the anonymous ones renamed), but bla.all_fields will + return the 3 fields + b1, b2, c1 and c2 (and an information telling if it + has been generated from an anonymous Struct/Union). + + bla.get_field(vm, addr, "b1") will work. """ def __init__(self, name, fields): @@ -518,24 +536,67 @@ class Struct(Type): # Build a proper (name, Field()) list, handling cases where the user # supplies a MemType subclass instead of a Type instance real_fields = [] - for name, field in fields: + uniq_count = 0 + for fname, field in fields: if isinstance(field, type) and issubclass(field, MemType): if field._type is None: raise ValueError("%r has no static type; use a subclasses " "with a non null _type or use a " "Type instance") field = field.get_type() - real_fields.append((name, field)) # For reflexion field._set_self_type(self) - self._fields_desc[name] = {"field": field, "offset": offset} - offset += field.size + + # Anonymous Struct/Union + if not fname and isinstance(field, Struct): + # Generate field information + updated_fields = { + name: { + # Same field type than the anon field subfield + 'field': fd['field'], + # But the current offset is added + 'offset': fd['offset'] + offset, + } + for name, fd in field._fields_desc.iteritems() + } + + # Add the newly generated fields from the anon field + self._fields_desc.update(updated_fields) + real_fields += [(name, fld, True) + for name, fld in field.fields] + + # Rename the anonymous field + fname = '__anon_%x' % uniq_count + uniq_count += 1 + + self._fields_desc[fname] = {"field": field, "offset": offset} + real_fields.append((fname, field, False)) + offset = self._next_offset(field, offset) + # fields is immutable self._fields = tuple(real_fields) + def _next_offset(self, field, orig_offset): + return orig_offset + field.size + @property def fields(self): + """Returns a sequence of (name, field) describing the fields of this + Struct, in order of offset. + + Fields generated from anonymous Unions or Structs are excluded from + this sequence. + """ + return tuple((name, field) for name, field, anon in self._fields + if not anon) + + @property + def all_fields(self): + """Returns a sequence of (, , ), + where is_anon is True when a field is generated from an anonymous + Struct or Union, and False for the fields that have been provided as is. + """ return self._fields def set(self, vm, addr, val): @@ -600,7 +661,7 @@ class Struct(Type): class Union(Struct): """Represents a C union. - + Allows to put multiple fields at the same offset in a MemStruct, similar to unions in C. The Union will have the size of the largest of its fields. @@ -629,8 +690,8 @@ class Union(Struct): def size(self): return max(field.size for _, field in self.fields) - def get_offset(self, field_name): - return 0 + def _next_offset(self, field, orig_offset): + return orig_offset def _get_pinned_base_class(self): return MemUnion @@ -1019,7 +1080,7 @@ class Str(Type): class Void(Type): """Represents the C void type. - + Mapped to MemVoid. """ @@ -1035,7 +1096,7 @@ class Void(Type): class Self(Void): """Special marker to reference a type inside itself. - + Mapped to MemSelf. Example: @@ -1375,7 +1436,7 @@ class MemStruct(MemType): @classmethod def _gen_attributes(cls): # Generate self. getter and setters - for name, field in cls._type.fields: + for name, field, _ in cls._type.all_fields: setattr(cls, name, property( lambda self, name=name: self.get_field(name), lambda self, val, name=name: self.set_field(name, val) diff --git a/test/core/types.py b/test/core/types.py index c59a68d6..7ad8ad13 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -491,6 +491,33 @@ assert Array(Num("B"), 19).lval != Array(Num("B"), 20).lval assert MyStruct == Struct(MyStruct.__name__, MyStruct.fields).lval assert MyStruct.get_type() == Struct(MyStruct.__name__, MyStruct.fields) +# Anonymous Unions +class Anon(MemStruct): + fields = [ + ("a", Num("B")), + # If a field name evaluates to False ("" or None for example) and the + # field type is a Struct subclass (Struct, Union, BitField), the field + # is considered as an anonymous struct or union. Therefore, Anon will + # have b1, b2 and c1, c2 attributes in that case. + ("", Union([("b1", Num("B")), ("b2", Num("H"))])), + ("", Struct("", [("c1", Num("B")), ("c2", Num("B"))])), + ("d", Num("B")), + ] + +anon = Anon(jitter.vm) +anon.memset() +anon.a = 0x07 +anon.b2 = 0x0201 +anon.c1 = 0x55 +anon.c2 = 0x77 +anon.d = 0x33 +assert anon.a == 0x07 +assert anon.b1 == 0x01 +assert anon.b2 == 0x0201 +assert anon.c1 == 0x55 +assert anon.c2 == 0x77 +assert anon.d == 0x33 + # Repr tests -- cgit 1.4.1 From e6ec6f9d800584234301733340b171092aac3f9a Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 21 Dec 2015 11:29:37 +0100 Subject: Types: typo, MemStruct.get_offset should be a classmethod Also added tests and MemArray.get_offset --- miasm2/core/types.py | 10 ++++++++-- test/core/types.py | 11 +++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'test/core') diff --git a/miasm2/core/types.py b/miasm2/core/types.py index b6eaf0e4..bb7536e5 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -1362,9 +1362,10 @@ class MemStruct(MemType): offset = 0 return self._addr + offset - def get_offset(self, field_name): + @classmethod + def get_offset(cls, field_name): """Shorthand for self.get_type().get_offset(field_name).""" - return self._type.get_offset(field_name) + return cls.get_type().get_offset(field_name) def get_field(self, name): """Get a field value by name. @@ -1575,6 +1576,11 @@ class MemArray(MemType): def get_addr(self, idx=0): return self._addr + self.get_type().get_offset(idx) + @classmethod + def get_offset(cls, idx): + """Shorthand for self.get_type().get_offset(idx).""" + return cls.get_type().get_offset(idx) + def __getitem__(self, idx): return self.get_type().get_item(self._vm, self._addr, idx) diff --git a/test/core/types.py b/test/core/types.py index 7ad8ad13..bb1d5da1 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -518,6 +518,17 @@ assert anon.c1 == 0x55 assert anon.c2 == 0x77 assert anon.d == 0x33 +# get_offset +for field, off in (("a", 0), ("b1", 1), ("b2", 1), ("c1", 3), ("c2", 4), + ("d", 5)): + assert Anon.get_offset(field) == Anon.get_type().get_offset(field) + assert Anon.get_offset(field) == off + +arr_t = Array(Num("H")) +for idx, off in ((0, 0), (1, 2), (30, 60)): + assert arr_t.get_offset(idx) == arr_t.lval.get_offset(idx) + assert arr_t.get_offset(idx) == off + # Repr tests -- cgit 1.4.1