From ba2df16277d7d4deae118ed11e1e92cd478045ec Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 30 Nov 2015 16:00:26 +0100 Subject: MemStruct/Types: Renamed analysis.mem to core.types --- example/jitter/types.py | 234 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 example/jitter/types.py (limited to 'example/jitter/types.py') diff --git a/example/jitter/types.py b/example/jitter/types.py new file mode 100644 index 00000000..6d8543b4 --- /dev/null +++ b/example/jitter/types.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python +"""This script is just a short example of common usages for miasm2.core.types. +For a more complete view of what is possible, tests/core/types.py covers +most of the module possibilities, and the module doc gives useful information +as well. +""" + +from miasm2.analysis.machine import Machine +from miasm2.core.types import MemStruct, Self, Void, Str, Array, Ptr, \ + Num, Array, set_allocator +from miasm2.os_dep.common import heap + +# Instanciate a heap +my_heap = heap() +# And set it as the default memory allocator, to avoid manual allocation and +# explicit address passing to the MemType subclasses (like MemStruct) +# constructor +set_allocator(my_heap.vm_alloc) + +# Let's reimplement a simple C generic linked list mapped on a VmMngr. + +# All the structures and methods will use the python objects but all the data +# is in fact stored in the VmMngr + +class ListNode(MemStruct): + fields = [ + # The " Date: Fri, 4 Dec 2015 10:16:37 +0100 Subject: MemStruct/Types: pinned renamed to lval --- example/jitter/types.py | 2 ++ miasm2/core/types.py | 79 ++++++++++++++++++++++++++++++++++--------------- test/core/types.py | 44 +++++++++++++-------------- 3 files changed, 79 insertions(+), 46 deletions(-) (limited to 'example/jitter/types.py') diff --git a/example/jitter/types.py b/example/jitter/types.py index 6d8543b4..f4a7ddb4 100644 --- a/example/jitter/types.py +++ b/example/jitter/types.py @@ -229,6 +229,8 @@ print "See that the original array has been modified:" print repr(data) print +# TODO: type manipulation examples + print "See test/core/types.py and the miasm2.core.types module doc for " print "more information." diff --git a/miasm2/core/types.py b/miasm2/core/types.py index 3c8d5b8b..d6bc3cf5 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -1,5 +1,34 @@ -"""This module provides classes to manipulate C structures backed by a VmMngr -object (a miasm sandbox virtual memory). +"""This module provides classes to manipulate pure C types as well as their +representation in memory. A typical usecase is to use this module to +easily manipylate structures backed by a VmMngr object (a miasm sandbox virtual +memory): + + class ListNode(MemStruct): + fields = [ + ("next", Ptr(", ): - # FIXME: "I" => "u32" class MyStruct(MemStruct): fields = [ # Scalar field: just struct.pack field with one value @@ -210,10 +238,13 @@ class Type(object): return self._unpack(raw) @property - def pinned(self): + def lval(self): """Returns a class with a (vm, addr) constructor that allows to interact with this type in memory. + In compilation terms, it returns a class allowing to instanciate an + lvalue of this type. + @return: a MemType subclass. """ if self in DYN_MEM_STRUCT_CACHE: @@ -225,7 +256,7 @@ class Type(object): def _build_pinned_type(self): """Builds the MemType subclass allowing to interract with this type. - Called by self.pinned when it is not in cache. + Called by self.lval when it is not in cache. """ pinned_base_class = self._get_pinned_base_class() pinned_type = type("Mem%r" % self, (pinned_base_class,), @@ -324,7 +355,7 @@ class Ptr(Num): in memory @dst_type: (MemType or Type) the MemType this Ptr points to. If a Type is given, it is transformed into a MemType with - TheType.pinned. + TheType.lval. *type_args, **type_kwargs: arguments to pass to the the pointed MemType when instanciating it (e.g. for MemStr encoding or MemArray field_type). @@ -342,10 +373,10 @@ class Ptr(Num): dst_type._get_self_type = lambda: self._get_self_type() # dst_type cannot be patched here, since _get_self_type of the outer # class has not yet been set. Patching dst_type involves calling - # dst_type.pinned, which will only return a type that does not point + # dst_type.lval, which will only return a type that does not point # on MemSelf but on the right class only when _get_self_type of the # outer class has been replaced by _MetaMemStruct. - # In short, dst_type = dst_type.pinned is not valid here, it is done + # In short, dst_type = dst_type.lval is not valid here, it is done # lazily in _fix_dst_type self._dst_type = dst_type self._type_args = type_args @@ -358,7 +389,7 @@ class Ptr(Num): else: raise ValueError("Unsupported usecase for MemSelf, sorry") if isinstance(self._dst_type, Type): - self._dst_type = self._dst_type.pinned + self._dst_type = self._dst_type.lval @property def dst_type(self): @@ -374,7 +405,7 @@ class Ptr(Num): super(Ptr, self).set(vm, addr, val) def get(self, vm, addr): - return self.pinned(vm, addr) + return self.lval(vm, addr) def get_val(self, vm, addr): """Get the numeric value of a Ptr""" @@ -429,14 +460,14 @@ class Struct(Type): Mapped to MemStruct. - NOTE: The `.pinned` property of Struct creates classes on the fly. If an + NOTE: The `.lval` property of Struct creates classes on the fly. If an equivalent structure is created by subclassing MemStruct, an exception is raised to prevent creating multiple classes designating the same type. Example: s = Struct("Toto", [("f1", Num("I")), ("f2", Num("I"))]) - Toto1 = s.pinned + Toto1 = s.lval # This raises an exception, because it describes the same structure as # Toto1 @@ -469,7 +500,7 @@ class Struct(Type): vm.set_mem(addr, raw) def get(self, vm, addr): - return self.pinned(vm, addr) + return self.lval(vm, addr) def get_field(self, vm, addr, name): """Get a field value by @name and base structure @addr in @vm VmMngr.""" @@ -618,7 +649,7 @@ class Array(Type): "Assignment only implemented for list and MemSizedArray") def get(self, vm, addr): - return self.pinned(vm, addr) + return self.lval(vm, addr) def size(self): if self.is_sized(): @@ -949,7 +980,7 @@ class MemType(object): virtual memory. Globally, MemTypes are not meant to be used directly: specialized - subclasses are generated by Type(...).pinned and should be used instead. + subclasses are generated by Type(...).lval and should be used instead. The main exception is MemStruct, which you may want to subclass yourself for syntactic ease. """ @@ -1022,11 +1053,11 @@ class MemType(object): """Cast this MemType to another MemType (same address, same vm, but different type). Return the casted MemType. - @other_type: either a Type instance (other_type.pinned is used) or a + @other_type: either a Type instance (other_type.lval is used) or a MemType subclass """ if isinstance(other_type, Type): - other_type = other_type.pinned + other_type = other_type.lval return other_type(self._vm, self.get_addr()) def cast_field(self, field, other_type, *type_args, **type_kwargs): @@ -1035,7 +1066,7 @@ class MemType(object): @field: field specification, for example its name for a struct, or an index in an array. See the subclass doc. - @other_type: either a Type instance (other_type.pinned is used) or a + @other_type: either a Type instance (other_type.lval is used) or a MemType subclass """ raise NotImplementedError("Abstract") @@ -1127,7 +1158,7 @@ class MemStruct(MemType): assert mstruct.other.deref.foo == 0x1234 Note that: - MyStruct = Struct("MyStruct", ).pinned + MyStruct = Struct("MyStruct", ).lval is equivalent to the previous MyStruct declaration. See the various Type-s doc for more information. See MemStruct.gen_fields @@ -1166,7 +1197,7 @@ class MemStruct(MemType): def cast_field(self, field, other_type): """In this implementation, @field is a field name""" if isinstance(other_type, Type): - other_type = other_type.pinned + other_type = other_type.lval return other_type(self._vm, self.get_addr(field)) # Field generation method, voluntarily public to be able to gen fields @@ -1214,7 +1245,7 @@ class MemStruct(MemType): "one. Use it instead.") # Register this class so that another one will not be created when - # calling cls._type.pinned + # calling cls._type.lval DYN_MEM_STRUCT_CACHE[cls._type] = cls cls._gen_attributes() @@ -1336,7 +1367,7 @@ class MemArray(MemType): It can be indexed for setting and getting elements, example: - array = Array(Num("I")).pinned(vm, addr)) + array = Array(Num("I")).lval(vm, addr)) array[2] = 5 array[4:8] = [0, 1, 2, 3] print array[20] @@ -1371,7 +1402,7 @@ class MemSizedArray(MemArray): """A fixed size MemArray. This type is dynamically sized. Generate a fixed @field_type and @array_len - array which has a static size by using Array(type, size).pinned. + array which has a static size by using Array(type, size).lval. """ @property diff --git a/test/core/types.py b/test/core/types.py index db72449c..f1ff706b 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -119,7 +119,7 @@ assert memval == 8 # Str tests ## Basic tests -memstr = Str().pinned(jitter.vm, addr_str) +memstr = Str().lval(jitter.vm, addr_str) memstr.val = "" assert memstr.val == "" assert jitter.vm.get_mem(memstr.get_addr(), 1) == '\x00' @@ -138,13 +138,13 @@ assert mstruct.s.deref.val == "That's all folks!" assert memstr.val == "That's all folks!" ## Other address, same value, same encoding -memstr2 = Str().pinned(jitter.vm, addr_str2) +memstr2 = Str().lval(jitter.vm, addr_str2) memstr2.val = "That's all folks!" assert memstr2.get_addr() != memstr.get_addr() assert memstr2 == memstr ## Same value, other encoding -memstr3 = Str("utf16").pinned(jitter.vm, addr_str3) +memstr3 = Str("utf16").lval(jitter.vm, addr_str3) memstr3.val = "That's all folks!" assert memstr3.get_addr() != memstr.get_addr() assert memstr3.get_size() != memstr.get_size() # Size is different @@ -156,7 +156,7 @@ assert memstr3.val == memstr.val # But the python value is the same # Array tests # Allocate buffer manually, since memarray is unsized alloc_addr = my_heap.vm_alloc(jitter.vm, 0x100) -memarray = Array(Num("I")).pinned(jitter.vm, alloc_addr) +memarray = Array(Num("I")).lval(jitter.vm, alloc_addr) memarray[0] = 0x02 assert memarray[0] == 0x02 assert jitter.vm.get_mem(memarray.get_addr(), @@ -188,8 +188,8 @@ except ValueError: pass -memsarray = Array(Num("I"), 10).pinned(jitter.vm) -# And Array(type, size).pinned generates statically sized types +memsarray = Array(Num("I"), 10).lval(jitter.vm) +# And Array(type, size).lval generates statically sized types assert memsarray.sizeof() == Num("I").size() * 10 memsarray.memset('\xcc') assert memsarray[0] == 0xcccccccc @@ -237,7 +237,7 @@ for val in ms2.s2: assert val == 1 ### Field assignment (MemSizedArray) -array2 = Array(Num("B"), 10).pinned(jitter.vm) +array2 = Array(Num("B"), 10).lval(jitter.vm) jitter.vm.set_mem(array2.get_addr(), '\x02'*10) for val in array2: assert val == 2 @@ -406,8 +406,8 @@ assert b.a.deref == a # Cast tests # MemStruct cast -MemInt = Num("I").pinned -MemShort = Num("H").pinned +MemInt = Num("I").lval +MemShort = Num("H").lval dword = MemInt(jitter.vm) dword.val = 0x12345678 assert isinstance(dword.cast(MemShort), MemShort) @@ -427,7 +427,7 @@ ms2.s2[5] = 0xab assert MemShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd # void* style cast -MemPtrVoid = Ptr("I", Void()).pinned +MemPtrVoid = Ptr("I", Void()).lval p = MemPtrVoid(jitter.vm) p.val = mstruct.get_addr() assert p.deref.cast(MyStruct) == mstruct @@ -474,17 +474,17 @@ assert BitField(Num("B"), [("f1", 1), ("f2", 4), ("f3", 1)]) != \ BitField(Num("B"), [("f1", 2), ("f2", 4), ("f3", 1)]) -# Quick MemField.pinned/MemField hash test -assert Num("f").pinned(jitter.vm, addr) == Num("f").pinned(jitter.vm, addr) +# Quick MemField.lval/MemField hash test +assert Num("f").lval(jitter.vm, addr) == Num("f").lval(jitter.vm, addr) # Types are cached -assert Num("f").pinned == Num("f").pinned -assert Num("d").pinned != Num("f").pinned -assert Union([("f1", Num("I")), ("f2", Num("H"))]).pinned == \ - Union([("f1", Num("I")), ("f2", Num("H"))]).pinned -assert Array(Num("B")).pinned == Array(Num("B")).pinned -assert Array(Num("I")).pinned != Array(Num("B")).pinned -assert Array(Num("B"), 20).pinned == Array(Num("B"), 20).pinned -assert Array(Num("B"), 19).pinned != Array(Num("B"), 20).pinned +assert Num("f").lval == Num("f").lval +assert Num("d").lval != Num("f").lval +assert Union([("f1", Num("I")), ("f2", Num("H"))]).lval == \ + Union([("f1", Num("I")), ("f2", Num("H"))]).lval +assert Array(Num("B")).lval == Array(Num("B")).lval +assert Array(Num("I")).lval != Array(Num("B")).lval +assert Array(Num("B"), 20).lval == Array(Num("B"), 20).lval +assert Array(Num("B"), 19).lval != Array(Num("B"), 20).lval # Repr tests @@ -496,8 +496,8 @@ print repr(cont), '\n' print repr(uni), '\n' print repr(bit), '\n' print repr(ideas), '\n' -print repr(Array(MyStruct2.get_type(), 2).pinned(jitter.vm, addr)), '\n' -print repr(Num("f").pinned(jitter.vm, addr)), '\n' +print repr(Array(MyStruct2.get_type(), 2).lval(jitter.vm, addr)), '\n' +print repr(Num("f").lval(jitter.vm, addr)), '\n' print repr(memarray) print repr(memsarray) print repr(memstr) -- cgit 1.4.1 From bd85479c0c7aba2d9ec9aeb2e6b2c4bb4b54d3e0 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Fri, 4 Dec 2015 11:05:51 +0100 Subject: MemStruct/Types: example on type manipulations --- example/jitter/types.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'example/jitter/types.py') diff --git a/example/jitter/types.py b/example/jitter/types.py index f4a7ddb4..6c0b59af 100644 --- a/example/jitter/types.py +++ b/example/jitter/types.py @@ -229,7 +229,29 @@ print "See that the original array has been modified:" print repr(data) print -# TODO: type manipulation examples +# Some type manipulation examples, for example let's construct an argv for +# a program: +# Let's say that we have two arguments, +1 for the program name and +1 for the +# final null ptr in argv, the array has 4 elements: +argv_t = Array(Ptr(" Date: Sat, 5 Dec 2015 14:44:23 +0100 Subject: Types: Type size is now a property --- example/jitter/types.py | 2 +- miasm2/core/types.py | 35 +++++++++++++++++++++-------------- test/core/types.py | 10 +++++----- 3 files changed, 27 insertions(+), 20 deletions(-) (limited to 'example/jitter/types.py') diff --git a/example/jitter/types.py b/example/jitter/types.py index 6c0b59af..e714372c 100644 --- a/example/jitter/types.py +++ b/example/jitter/types.py @@ -171,7 +171,7 @@ link.push(DataArray(vm)) assert link.size == 3 # If you get it directly from the VM, it is updated as well raw_size = vm.get_mem(link.get_addr("size"), link.get_type() - .get_field_type("size").size()) + .get_field_type("size").size) assert raw_size == '\x03\x00\x00\x00' print "The linked list just built:" diff --git a/miasm2/core/types.py b/miasm2/core/types.py index bf8f7823..03d23a5c 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -262,7 +262,7 @@ class Type(object): def get(self, vm, addr): """Get the python value of a field from a VmMngr memory at @addr.""" - raw = vm.get_mem(addr, self.size()) + raw = vm.get_mem(addr, self.size) return self._unpack(raw) @property @@ -306,12 +306,13 @@ class Type(object): """ self._self_type = self_type + @property def size(self): """Return the size in bytes of the serialized version of this field""" raise NotImplementedError() def __len__(self): - return self.size() + return self.size def __neq__(self, other): return not self == other @@ -332,6 +333,7 @@ class RawStruct(Type): def _unpack(self, raw_str): return struct.unpack(self._fmt, raw_str) + @property def size(self): return struct.calcsize(self._fmt) @@ -517,7 +519,7 @@ class Struct(Type): # For reflexion field._set_self_type(self) self._fields_desc[name] = {"field": field, "offset": offset} - offset += field.size() + offset += field.size @property def fields(self): @@ -549,8 +551,9 @@ class Struct(Type): offset = self.get_offset(name) field.set(vm, addr + offset, val) + @property def size(self): - return sum(field.size() for _, field in self.fields) + return sum(field.size for _, field in self.fields) def get_offset(self, field_name): """ @@ -609,8 +612,9 @@ class Union(Struct): """@field_list: a [(name, field)] list, see the class doc""" super(Union, self).__init__("union", field_list) + @property def size(self): - return max(field.size() for _, field in self.fields) + return max(field.size for _, field in self.fields) def get_offset(self, field_name): return 0 @@ -658,7 +662,7 @@ class Array(Type): def set(self, vm, addr, val): # MemSizedArray assignment if isinstance(val, MemSizedArray): - if val.array_len != self.array_len or len(val) != self.size(): + if val.array_len != self.array_len or len(val) != self.size: raise ValueError("Size mismatch in MemSizedArray assignment") raw = str(val) vm.set_mem(addr, raw) @@ -670,7 +674,7 @@ class Array(Type): offset = 0 for elt in val: self.field_type.set(vm, addr + offset, elt) - offset += self.field_type.size() + offset += self.field_type.size else: raise RuntimeError( @@ -679,6 +683,7 @@ class Array(Type): def get(self, vm, addr): return self.lval(vm, addr) + @property def size(self): if self.is_sized(): return self.get_offset(self.array_len) @@ -688,7 +693,7 @@ class Array(Type): def get_offset(self, idx): """Returns the offset of the item at index @idx.""" - return self.field_type.size() * idx + return self.field_type.size * idx def get_item(self, vm, addr, idx): """Get the item(s) at index @idx. @@ -745,7 +750,7 @@ class Array(Type): def _check_bounds(self, idx): if not isinstance(idx, (int, long)): raise ValueError("index must be an int or a long") - if idx < 0 or (self.is_sized() and idx >= self.size()): + if idx < 0 or (self.is_sized() and idx >= self.size): raise IndexError("Index %s out of bounds" % idx) def _get_pinned_base_class(self): @@ -786,7 +791,7 @@ class Bits(Type): def set(self, vm, addr, val): val_mask = (1 << self._bits) - 1 val_shifted = (val & val_mask) << self._bit_offset - num_size = self._num.size() * 8 + num_size = self._num.size * 8 full_num_mask = (1 << num_size) - 1 num_mask = (~(val_mask << self._bit_offset)) & full_num_mask @@ -801,8 +806,9 @@ class Bits(Type): res_val = (num_val >> self._bit_offset) & val_mask return res_val + @property def size(self): - return self._num.size() + return self._num.size @property def bit_size(self): @@ -868,7 +874,7 @@ class BitField(Union): for name, bits in bit_list: fields.append((name, Bits(self._num, bits, offset))) offset += bits - if offset > self._num.size() * 8: + if offset > self._num.size == 8: raise ValueError("sum of bit lengths is > to the backing num size") super(BitField, self).__init__(fields) @@ -968,6 +974,7 @@ class Str(Type): set_str = self.encodings[self.enc][1] set_str(vm, addr, s) + @property def size(self): """This type is unsized.""" raise ValueError("Str is unsized") @@ -1130,7 +1137,7 @@ class MemType(object): """Return the static size of this type. By default, it is the size of the underlying Type. """ - return cls._type.size() + return cls._type.size def get_size(self): """Return the dynamic size of this structure (e.g. the size of an @@ -1518,7 +1525,7 @@ class MemSizedArray(MemArray): return self.get_type().array_len def get_size(self): - return self.get_type().size() + return self.get_type().size def __iter__(self): for i in xrange(self.get_type().array_len): diff --git a/test/core/types.py b/test/core/types.py index f1ff706b..de6034ef 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -160,11 +160,11 @@ memarray = Array(Num("I")).lval(jitter.vm, alloc_addr) memarray[0] = 0x02 assert memarray[0] == 0x02 assert jitter.vm.get_mem(memarray.get_addr(), - Num("I").size()) == '\x02\x00\x00\x00' + Num("I").size) == '\x02\x00\x00\x00' memarray[2] = 0xbbbbbbbb assert memarray[2] == 0xbbbbbbbb -assert jitter.vm.get_mem(memarray.get_addr() + 2 * Num("I").size(), - Num("I").size()) == '\xbb\xbb\xbb\xbb' +assert jitter.vm.get_mem(memarray.get_addr() + 2 * Num("I").size, + Num("I").size) == '\xbb\xbb\xbb\xbb' try: s = str(memarray) assert False, "Should raise" @@ -190,7 +190,7 @@ except ValueError: memsarray = Array(Num("I"), 10).lval(jitter.vm) # And Array(type, size).lval generates statically sized types -assert memsarray.sizeof() == Num("I").size() * 10 +assert memsarray.sizeof() == Num("I").size * 10 memsarray.memset('\xcc') assert memsarray[0] == 0xcccccccc assert len(memsarray) == 10 * 4 @@ -356,7 +356,7 @@ class UnhealthyIdeas(MemStruct): ("pppself", Ptr("I", Ptr("I", Ptr("I", Self())))), ] -p_size = Ptr("I", Void()).size() +p_size = Ptr("I", Void()).size ideas = UnhealthyIdeas(jitter.vm) ideas.memset() -- cgit 1.4.1 From b421c5059f2a7bfd47b5bdf4c3488a3e8bda5b40 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 18 Jan 2016 14:04:20 +0100 Subject: Types: misleading comment correction in example --- example/jitter/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'example/jitter/types.py') diff --git a/example/jitter/types.py b/example/jitter/types.py index e714372c..c37c3b84 100644 --- a/example/jitter/types.py +++ b/example/jitter/types.py @@ -239,7 +239,7 @@ print "3 arguments argv type:", argv_t # alloc argv somewhere argv = argv_t.lval(vm) -# Auto alloc with a buffer type +# Auto alloc with the MemStr.from_str helper MemStrAnsi = Str().lval argv[0].val = MemStrAnsi.from_str(vm, "./my-program").get_addr() argv[1].val = MemStrAnsi.from_str(vm, "arg1").get_addr() -- cgit 1.4.1