From 6e635113b53f932573687f9a6e3fc227cde6c0d9 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Sat, 7 Nov 2015 20:06:21 +0100 Subject: Introducing MemStruct feature in miasm2.analysis.mem --- test/analysis/mem.py | 440 +++++++++++++++++++++++++++++++++++++++++++++++++++ test/test_all.py | 2 + 2 files changed, 442 insertions(+) create mode 100644 test/analysis/mem.py (limited to 'test') diff --git a/test/analysis/mem.py b/test/analysis/mem.py new file mode 100644 index 00000000..4b306e67 --- /dev/null +++ b/test/analysis/mem.py @@ -0,0 +1,440 @@ +#!/usr/bin/env python + +# miasm2.analysis.mem tests + + +from miasm2.analysis.machine import Machine +from miasm2.analysis.mem import * +from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE + +# Two structures with some fields +class OtherStruct(MemStruct): + fields = [ + ("foo", Num("H")), + ] + +class MyStruct(MemStruct): + fields = [ + # Integer field: just struct.pack fields with one value + ("num", Num("I")), + ("flags", Num("B")), + # Ptr fields are Int, but they can also be dereferenced + # (self.deref_). Deref can be read and set. + ("other", Ptr("I", OtherStruct)), + # Ptr to a variable length String + ("s", Ptr("I", MemStr)), + ("i", Ptr("I", Num("I"))), + ] + +jitter = Machine("x86_32").jitter("python") +jitter.init_stack() +addr = 0x1000 +addr2 = 0x1100 +addr3 = 0x1200 +addr_str = 0x1300 +addr_str2 = 0x1400 +addr_str3 = 0x1500 +addr4 = 0x1600 +addr5 = 0x1700 +addr6 = 0x1800 +addr7 = 0x1900 +addr8 = 0x2000 +addr9 = 0x2100 +addr10 = 0x2200 +addr11 = 0x2300 +size = 0x2000 +# Initialize all mem with 0xaa +jitter.vm.add_memory_page(addr, PAGE_READ | PAGE_WRITE, "\xaa"*size) + + +# MemStruct tests +## Creation +mstruct = MyStruct(jitter.vm, addr) +## Fields are read from the virtual memory +assert mstruct.num == 0xaaaaaaaa +assert mstruct.flags == 0xaa + +## Field assignment modifies virtual memory +mstruct.num = 3 +assert mstruct.num == 3 +memval = struct.unpack("I", jitter.vm.get_mem(mstruct.get_addr(), 4))[0] +assert memval == 3 + +## Memset sets the whole structure +mstruct.memset() +assert mstruct.num == 0 +assert mstruct.flags == 0 +assert mstruct.other == 0 +assert mstruct.s == 0 +assert mstruct.i == 0 +mstruct.memset('\x11') +assert mstruct.num == 0x11111111 +assert mstruct.flags == 0x11 +assert mstruct.other == 0x11111111 +assert mstruct.s == 0x11111111 +assert mstruct.i == 0x11111111 + + +# Ptr tests +## Setup for Ptr tests +other = OtherStruct(jitter.vm, addr2) +other.foo = 0x1234 +assert other.foo == 0x1234 + +## Basic usage +mstruct.other = other.get_addr() +assert mstruct.other == addr2 +assert mstruct.deref_other == other +assert mstruct.deref_other.foo == 0x1234 + +## Deref assignment +other2 = OtherStruct(jitter.vm, addr3) +other2.foo = 0xbeef +assert mstruct.deref_other != other2 +mstruct.deref_other = other2 +assert mstruct.deref_other == other2 +assert mstruct.deref_other.foo == 0xbeef +assert mstruct.other == addr2 # Addr did not change +assert other.foo == 0xbeef # Deref assignment copies by value +assert other2.foo == 0xbeef +assert other.get_addr() != other2.get_addr() # Not the same address +assert other == other2 # But same value + +## Same stuff for Ptr to MemField +mstruct.i = addr7 +mstruct.deref_i.value = 8 +assert mstruct.deref_i.value == 8 +assert mstruct.i == addr7 +memval = struct.unpack("I", jitter.vm.get_mem(addr7, 4))[0] +assert memval == 8 + + +# Str tests +## Basic tests +memstr = MemStr(jitter.vm, addr_str) +memstr.value = "" +assert memstr.value == "" +assert jitter.vm.get_mem(memstr.get_addr(), 1) == '\x00' +memstr.value = "lala" +assert jitter.vm.get_mem(memstr.get_addr(), memstr.get_size()) == 'lala\x00' +jitter.vm.set_mem(memstr.get_addr(), 'MIAMs\x00') +assert memstr.value == 'MIAMs' + +## Ptr(MemStr) manipulations +mstruct.s = memstr.get_addr() +assert mstruct.s == addr_str +assert mstruct.deref_s == memstr +assert mstruct.deref_s.value == 'MIAMs' +mstruct.deref_s.value = "That's all folks!" +assert mstruct.deref_s.value == "That's all folks!" +assert memstr.value == "That's all folks!" + +## Other address, same value, same encoding +memstr2 = MemStr(jitter.vm, addr_str2) +memstr2.value = "That's all folks!" +assert memstr2.get_addr() != memstr.get_addr() +assert memstr2 == memstr + +## Same value, other encoding +memstr3 = MemStr(jitter.vm, addr_str3, "utf16") +memstr3.value = "That's all folks!" +assert memstr3.get_addr() != memstr.get_addr() +assert memstr3.get_size() != memstr.get_size() # Size is different +assert str(memstr3) != str(memstr) # Mem representation is different +assert memstr3 != memstr # Encoding is different, so they are not eq +assert memstr3.value == memstr.value # But the python value is the same + + +# MemArray tests +memarray = MemArray(jitter.vm, addr6, Num("I")) +# This also works: +_memarray = mem_array_type(Num("I"))(jitter.vm, addr6) +memarray[0] = 0x02 +assert memarray[0] == 0x02 +assert jitter.vm.get_mem(memarray.get_addr(), + Num("I").size()) == '\x02\x00\x00\x00' +memarray[2] = 0xbbbbbbbb +assert memarray[2] == 0xbbbbbbbb +assert jitter.vm.get_mem(memarray.get_addr() + 2 * Num("I").size(), + Num("I").size()) == '\xbb\xbb\xbb\xbb' +try: + s = str(memarray) + assert False, "Should raise" +except (NotImplementedError, ValueError): + pass +try: + s = len(memarray) + assert False, "Should raise" +except (NotImplementedError, ValueError): + pass + +## Slice assignment +memarray[2:4] = [3, 3] +assert memarray[2] == 3 +assert memarray[3] == 3 +assert memarray[2:4] == [3, 3] +try: + memarray[2:4] = [3, 3, 3] + assert False, "Should raise, mismatched sizes" +except (ValueError): + pass + +try: + memarray[1, 2] + assert False, "Should raise, mismatched sizes" +except (ValueError): + pass + + +# MemSizedArray tests +memsarray = MemSizedArray(jitter.vm, addr6, Num("I"), 10) +# This also works: +_memsarray = mem_sized_array_type(Num("I"), 10)(jitter.vm, addr6) +# And mem_sized_array_type generates statically sized types +assert _memsarray.sizeof() == len(memsarray) +memsarray.memset('\xcc') +assert memsarray[0] == 0xcccccccc +assert len(memsarray) == 10 * 4 +assert str(memsarray) == '\xcc' * (4 * 10) +for val in memsarray: + assert val == 0xcccccccc +assert list(memsarray) == [0xcccccccc] * 10 +memsarray[0] = 2 +assert memsarray[0] == 2 +assert str(memsarray) == '\x02\x00\x00\x00' + '\xcc' * (4 * 9) + + +# Atypical fields (Struct and Array) +class MyStruct2(MemStruct): + fields = [ + ("s1", Struct("=BI")), + ("s2", Array(Num("B"), 10)), + ] + +ms2 = MyStruct2(jitter.vm, addr5) +ms2.memset('\xaa') +assert len(ms2) == 15 + +## Struct +assert len(ms2.s1) == 2 +assert ms2.s1[0] == 0xaa +assert ms2.s1[1] == 0xaaaaaaaa + +## Array +### Basic checks +assert len(ms2.s2) == 10 +for val in ms2.s2: + assert val == 0xaa +assert ms2.s2[0] == 0xaa +assert ms2.s2[9] == 0xaa + +### Subscript assignment +ms2.s2[3] = 2 +assert ms2.s2[3] == 2 + +### Field assignment (list) +ms2.s2 = [1] * 10 +for val in ms2.s2: + assert val == 1 + +### Field assignment (MemSizedArray) +jitter.vm.set_mem(addr4, '\x02'*10) +array2 = MemSizedArray(jitter.vm, addr4, Num("B"), 10) +for val in array2: + assert val == 2 +ms2.s2 = array2 +for val in ms2.s2: + assert val == 2 + + +# Inline tests +class InStruct(MemStruct): + fields = [ + ("foo", Num("B")), + ("bar", Num("B")), + ] + +class ContStruct(MemStruct): + fields = [ + ("one", Num("B")), + ("instruct", Inline(InStruct)), + ("last", Num("B")), + ] + +cont = ContStruct(jitter.vm, addr4) +cont.memset() +assert len(cont) == 4 +assert len(cont.instruct) == 2 +assert cont.one == 0 +assert cont.last == 0 +assert cont.instruct.foo == 0 +assert cont.instruct.bar == 0 +cont.memset('\x11') +assert cont.one == 0x11 +assert cont.last == 0x11 +assert cont.instruct.foo == 0x11 +assert cont.instruct.bar == 0x11 + +cont.one = 0x01 +cont.instruct.foo = 0x02 +cont.instruct.bar = 0x03 +cont.last = 0x04 +assert cont.one == 0x01 +assert cont.instruct.foo == 0x02 +assert cont.instruct.bar == 0x03 +assert cont.last == 0x04 +assert jitter.vm.get_mem(cont.get_addr(), len(cont)) == '\x01\x02\x03\x04' + +# Quick mem(MemField) test: +assert mem(Num("f"))(jitter.vm, addr) == mem(Num("f"))(jitter.vm, addr) + + +# Union test +class UniStruct(MemStruct): + fields = [ + ("one", Num("B")), + ("union", Union([ + ("instruct", Inline(InStruct)), + ("i", Num(">I")), + ])), + ("last", Num("B")), + ] + +uni = UniStruct(jitter.vm, addr8) +jitter.vm.set_mem(addr8, ''.join(chr(x) for x in xrange(len(uni)))) +assert len(uni) == 6 # 1 + max(InStruct.sizeof(), 4) + 1 +assert uni.one == 0x00 +assert uni.instruct.foo == 0x01 +assert uni.instruct.bar == 0x02 +assert uni.i == 0x01020304 +assert uni.last == 0x05 +uni.instruct.foo = 0x02 +assert uni.i == 0x02020304 +uni.i = 0x11223344 +assert uni.instruct.foo == 0x11 +assert uni.instruct.bar == 0x22 + + +# BitField test +class BitStruct(MemStruct): + fields = [ + ("flags", BitField(Num("H"), [ + ("f1_1", 1), + ("f2_5", 5), + ("f3_8", 8), + ("f4_1", 1), + ])), + ] + +bit = BitStruct(jitter.vm, addr9) +bit.memset() +assert bit.flags == 0 +assert bit.f1_1 == 0 +assert bit.f2_5 == 0 +assert bit.f3_8 == 0 +assert bit.f4_1 == 0 +bit.f1_1 = 1 +bit.f2_5 = 0b10101 +bit.f3_8 = 0b10000001 +assert bit.flags == 0b0010000001101011 +assert bit.f1_1 == 1 +assert bit.f2_5 == 0b10101 +assert bit.f3_8 == 0b10000001 +assert bit.f4_1 == 0 +bit.flags = 0b1101010101011100 +assert bit.f1_1 == 0 +assert bit.f2_5 == 0b01110 +assert bit.f3_8 == 0b01010101 +assert bit.f4_1 == 1 + + +# Unhealthy ideas +class UnhealthyIdeas(MemStruct): + fields = [ + ("f1", Ptr("I", MemArray, Struct("=Bf"))), + ("f2", Array(Ptr("I", MemStr), 10)), + ("f3", Ptr("I", MemSelf)), + ("f4", Array(Ptr("I", MemSelf), 2)), + ("f5", Ptr("I", Ptr("I", MemSelf))), + ] + +# Other way to handle self dependency and circular dependencies +# NOTE: in this case, MemSelf would have been fine +UnhealthyIdeas.fields.append( + ("f6", Ptr("I", Ptr("I", Ptr("I", UnhealthyIdeas))))) +# Regen all fields +UnhealthyIdeas.gen_fields() + +ideas = UnhealthyIdeas(jitter.vm, addr7) +ideas.memset() +ideas.f3 = ideas.get_addr() +assert ideas == ideas.deref_f3 + +ideas.f4[0] = ideas.get_addr() +assert ideas.f4.deref_get(0) == ideas +ideas.f4[1] = addr6 +ideas.f4.deref_set(1, ideas) +assert ideas.f4[1] != ideas.get_addr() +assert ideas.f4.deref_get(1) == ideas + +ideas.f5 = addr2 +ideas.deref_f5.value = ideas.get_addr() +assert ideas.deref_f5.value == ideas.get_addr() +assert ideas.deref_f5.deref_value == ideas + +ideas.deref_f5.value = addr3 +ideas.deref_f5.deref_value = ideas +assert ideas.deref_f5.value != ideas.get_addr() +assert ideas.deref_f5.deref_value == ideas + +ideas.f6 = addr4 +ideas.deref_f6.value = addr5 +ideas.deref_f6.deref_value.value = ideas.get_addr() +assert ideas.deref_f6.deref_value.deref_value == ideas + +# Cast tests +# MemStruct cast +MemInt = mem(Num("I")) +MemShort = mem(Num("H")) +dword = MemInt(jitter.vm, addr10) +dword.value = 0x12345678 +assert isinstance(dword.cast(MemShort), MemShort) +assert dword.cast(MemShort).value == 0x5678 + +# Field cast +ms2.s2[0] = 0x34 +ms2.s2[1] = 0x12 +assert ms2.cast_field("s2", MemShort).value == 0x1234 + +# Other method +assert MemShort(jitter.vm, ms2.get_addr("s2")).value == 0x1234 + +# Manual cast inside an Array +ms2.s2[4] = 0xcd +ms2.s2[5] = 0xab +assert MemShort(jitter.vm, ms2.s2.index2addr(4)).value == 0xabcd + +# void* style cast +MemPtrVoid = mem(Ptr("I", MemVoid)) +MemPtrMyStruct = mem(Ptr("I", MyStruct)) +p = MemPtrVoid(jitter.vm, addr11) +p.value = mstruct.get_addr() +assert p.deref_value.cast(MyStruct) == mstruct +assert p.cast(MemPtrMyStruct).deref_value == mstruct + +print "Some struct reprs:\n" +print repr(mstruct), '\n' +print repr(ms2), '\n' +print repr(cont), '\n' +print repr(uni), '\n' +print repr(bit), '\n' +print repr(bit), '\n' +print repr(ideas), '\n' +print repr(mem(Array(Inline(MyStruct2), 2))(jitter.vm, addr)), '\n' +print repr(mem(Num("f"))(jitter.vm, addr)), '\n' +print repr(memarray) +print repr(memsarray) +print repr(memstr) +print repr(memstr3) + +print "Ok" # That's all folks! diff --git a/test/test_all.py b/test/test_all.py index bc019104..71f036a2 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -238,6 +238,8 @@ testset += RegressionTest(["depgraph.py"], base_dir="analysis", (14, 1), (15, 1))) for fname in fnames]) +testset += RegressionTest(["mem.py"], base_dir="analysis") + # Examples class Example(Test): """Examples specificities: -- cgit 1.4.1 From a03c4cb22a2bdaefe19ab2908dc55894211e5070 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Sun, 8 Nov 2015 02:06:32 +0100 Subject: MemStruct: auto-allocation feature MemStruct can be automatically allocated if a None addr is passed to the constructor and mem.allocator has been set to an allocation function. miasm2.os_dep.common.heap API has been extended to directly support a VmMngr as an argument. NOTE: heap.alloc and heap.vm_alloc could be merged, but allowing the first argument to be either a jitter or a vm is misleading, and changing the old API would have broken some code. --- miasm2/analysis/mem.py | 45 +++++++++------ miasm2/os_dep/common.py | 8 ++- test/analysis/mem.py | 142 +++++++++++++++++++++++++----------------------- 3 files changed, 111 insertions(+), 84 deletions(-) (limited to 'test') diff --git a/miasm2/analysis/mem.py b/miasm2/analysis/mem.py index a967e58f..057f7a37 100644 --- a/miasm2/analysis/mem.py +++ b/miasm2/analysis/mem.py @@ -7,7 +7,8 @@ console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) log.addHandler(console_handler) log.setLevel(logging.WARN) -# TODO: alloc +# allocator is a function(vm, size) -> allocated_address +allocator = None # Helpers @@ -150,22 +151,23 @@ class Ptr(Num): super(Ptr, self).set_self_type(self_type) def _fix_dst_type(self): - global classes - if self._dst_type == MemSelf: if self.get_self_type() is not None: self._dst_type = self.get_self_type() else: raise ValueError("Unsupported usecase for MemSelf, sorry") - def deref_get(self, vm, addr): + @property + def dst_type(self): self._fix_dst_type() - return self._dst_type(vm, addr, *self._type_args, **self._type_kwargs) + return self._dst_type + + def deref_get(self, vm, addr): + return self.dst_type(vm, addr, *self._type_args, **self._type_kwargs) def deref_set(self, vm, addr, val): - self._fix_dst_type() # Sanity check - if self._dst_type != val.__class__: + if self.dst_type != val.__class__: log.warning("Original type was %s, overriden by value of type %s", self._dst_type.__name__, val.__class__.__name__) @@ -381,10 +383,17 @@ class MemStruct(object): _size = None - def __init__(self, vm, addr, *args, **kwargs): + def __init__(self, vm, addr=None, *args, **kwargs): + global allocator super(MemStruct, self).__init__(*args, **kwargs) self._vm = vm - self._addr = addr + if addr is None: + if allocator is None: + raise ValueError("Cannot provide None address to MemStruct() if" + "%s.allocator is not set." % __name__) + self._addr = allocator(vm, self.get_size()) + else: + self._addr = addr def get_addr(self, field_name=None): if field_name is not None: @@ -402,6 +411,9 @@ class MemStruct(object): def get_size(self): return self.sizeof() + def get_field_type(self, name): + return self._attrs[name]['field'] + def get_attr(self, attr): if attr not in self._attrs: raise AttributeError("'%s' object has no attribute '%s'" @@ -438,7 +450,6 @@ class MemStruct(object): raise ValueError("byte must be a 1-lengthed str") self._vm.set_mem(self.get_addr(), byte * self.get_size()) - # TODO: examples def cast(self, other_type, *type_args, **type_kwargs): return self.cast_field(None, other_type, *type_args, **type_kwargs) @@ -544,14 +555,14 @@ class MemStr(MemStruct): class MemArray(MemStruct): _field_type = None - def __init__(self, vm, addr, field_type=None): - super(MemArray, self).__init__(vm, addr) - if self._field_type is None and field_type is not None: + def __init__(self, vm, addr=None, field_type=None): + if self._field_type is None: self._field_type = field_type if self._field_type is None: raise NotImplementedError( "Provide field_type to instanciate this class, " "or generate a subclass with mem_array_type.") + super(MemArray, self).__init__(vm, addr) @property def field_type(self): @@ -632,10 +643,12 @@ def mem_array_type(field_type): class MemSizedArray(MemArray): _array_len = None - def __init__(self, vm, addr, field_type=None, length=None): - super(MemSizedArray, self).__init__(vm, addr, field_type) - if self._array_len is None and length is not None: + def __init__(self, vm, addr=None, field_type=None, length=None): + # Set the length before anything else to allow get_size() to work for + # allocation + if self._array_len is None: self._array_len = length + super(MemSizedArray, self).__init__(vm, addr, field_type) if self._array_len is None or self._field_type is None: raise NotImplementedError( "Provide field_type and length to instanciate this class, " diff --git a/miasm2/os_dep/common.py b/miasm2/os_dep/common.py index 7f8caed1..b7eb656a 100644 --- a/miasm2/os_dep/common.py +++ b/miasm2/os_dep/common.py @@ -60,9 +60,15 @@ class heap(object): @jitter: a jitter instance @size: the size to allocate """ + return self.vm_alloc(jitter.vm, size) + def vm_alloc(self, vm, size): + """ + @vm: a VmMngr instance + @size: the size to allocate + """ addr = self.next_addr(size) - jitter.vm.add_memory_page(addr, PAGE_READ | PAGE_WRITE, "\x00" * size) + vm.add_memory_page(addr, PAGE_READ | PAGE_WRITE, "\x00" * size) return addr diff --git a/test/analysis/mem.py b/test/analysis/mem.py index 4b306e67..d9fe889c 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -2,10 +2,16 @@ # miasm2.analysis.mem tests +import struct from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import * +import miasm2.analysis.mem as mem_module +from miasm2.analysis.mem import MemStruct, Num, Ptr, MemStr, MemArray,\ + MemSizedArray, Array, mem_array_type,\ + mem_sized_array_type, Struct, Inline, mem,\ + Union, BitField, MemSelf, MemVoid from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm2.os_dep.common import heap # Two structures with some fields class OtherStruct(MemStruct): @@ -29,26 +35,17 @@ class MyStruct(MemStruct): jitter = Machine("x86_32").jitter("python") jitter.init_stack() addr = 0x1000 -addr2 = 0x1100 -addr3 = 0x1200 -addr_str = 0x1300 -addr_str2 = 0x1400 -addr_str3 = 0x1500 -addr4 = 0x1600 -addr5 = 0x1700 -addr6 = 0x1800 -addr7 = 0x1900 -addr8 = 0x2000 -addr9 = 0x2100 -addr10 = 0x2200 -addr11 = 0x2300 -size = 0x2000 +size = 0x1000 +addr_str = 0x1100 +addr_str2 = 0x1200 +addr_str3 = 0x1300 # Initialize all mem with 0xaa jitter.vm.add_memory_page(addr, PAGE_READ | PAGE_WRITE, "\xaa"*size) # MemStruct tests ## Creation +# Use manual allocation with explicit addr for the first example mstruct = MyStruct(jitter.vm, addr) ## Fields are read from the virtual memory assert mstruct.num == 0xaaaaaaaa @@ -75,37 +72,45 @@ assert mstruct.s == 0x11111111 assert mstruct.i == 0x11111111 +# From now, just use heap.vm_alloc +my_heap = heap() +mem_module.allocator = my_heap.vm_alloc + + # Ptr tests ## Setup for Ptr tests -other = OtherStruct(jitter.vm, addr2) +# the addr field can now be omited since allocator is set +other = OtherStruct(jitter.vm) other.foo = 0x1234 assert other.foo == 0x1234 ## Basic usage mstruct.other = other.get_addr() -assert mstruct.other == addr2 +assert mstruct.other == other.get_addr() assert mstruct.deref_other == other assert mstruct.deref_other.foo == 0x1234 ## Deref assignment -other2 = OtherStruct(jitter.vm, addr3) +other2 = OtherStruct(jitter.vm) other2.foo = 0xbeef assert mstruct.deref_other != other2 mstruct.deref_other = other2 assert mstruct.deref_other == other2 assert mstruct.deref_other.foo == 0xbeef -assert mstruct.other == addr2 # Addr did not change +assert mstruct.other == other.get_addr() # Addr did not change assert other.foo == 0xbeef # Deref assignment copies by value assert other2.foo == 0xbeef assert other.get_addr() != other2.get_addr() # Not the same address assert other == other2 # But same value ## Same stuff for Ptr to MemField -mstruct.i = addr7 +alloc_addr = my_heap.vm_alloc(jitter.vm, + mstruct.get_field_type("i").dst_type.sizeof()) +mstruct.i = alloc_addr mstruct.deref_i.value = 8 assert mstruct.deref_i.value == 8 -assert mstruct.i == addr7 -memval = struct.unpack("I", jitter.vm.get_mem(addr7, 4))[0] +assert mstruct.i == alloc_addr +memval = struct.unpack("I", jitter.vm.get_mem(alloc_addr, 4))[0] assert memval == 8 @@ -146,9 +151,11 @@ assert memstr3.value == memstr.value # But the python value is the same # MemArray tests -memarray = MemArray(jitter.vm, addr6, Num("I")) +# Allocate buffer manually, since memarray is unsized +alloc_addr = my_heap.vm_alloc(jitter.vm, 0x100) +memarray = MemArray(jitter.vm, alloc_addr, Num("I")) # This also works: -_memarray = mem_array_type(Num("I"))(jitter.vm, addr6) +_memarray = mem_array_type(Num("I"))(jitter.vm, alloc_addr) memarray[0] = 0x02 assert memarray[0] == 0x02 assert jitter.vm.get_mem(memarray.get_addr(), @@ -187,9 +194,9 @@ except (ValueError): # MemSizedArray tests -memsarray = MemSizedArray(jitter.vm, addr6, Num("I"), 10) +memsarray = MemSizedArray(jitter.vm, None, Num("I"), 10) # This also works: -_memsarray = mem_sized_array_type(Num("I"), 10)(jitter.vm, addr6) +_memsarray = mem_sized_array_type(Num("I"), 10)(jitter.vm) # And mem_sized_array_type generates statically sized types assert _memsarray.sizeof() == len(memsarray) memsarray.memset('\xcc') @@ -211,7 +218,7 @@ class MyStruct2(MemStruct): ("s2", Array(Num("B"), 10)), ] -ms2 = MyStruct2(jitter.vm, addr5) +ms2 = MyStruct2(jitter.vm) ms2.memset('\xaa') assert len(ms2) == 15 @@ -238,8 +245,8 @@ for val in ms2.s2: assert val == 1 ### Field assignment (MemSizedArray) -jitter.vm.set_mem(addr4, '\x02'*10) -array2 = MemSizedArray(jitter.vm, addr4, Num("B"), 10) +array2 = MemSizedArray(jitter.vm, None, Num("B"), 10) +jitter.vm.set_mem(array2.get_addr(), '\x02'*10) for val in array2: assert val == 2 ms2.s2 = array2 @@ -261,7 +268,7 @@ class ContStruct(MemStruct): ("last", Num("B")), ] -cont = ContStruct(jitter.vm, addr4) +cont = ContStruct(jitter.vm) cont.memset() assert len(cont) == 4 assert len(cont.instruct) == 2 @@ -300,8 +307,8 @@ class UniStruct(MemStruct): ("last", Num("B")), ] -uni = UniStruct(jitter.vm, addr8) -jitter.vm.set_mem(addr8, ''.join(chr(x) for x in xrange(len(uni)))) +uni = UniStruct(jitter.vm) +jitter.vm.set_mem(uni.get_addr(), ''.join(chr(x) for x in xrange(len(uni)))) assert len(uni) == 6 # 1 + max(InStruct.sizeof(), 4) + 1 assert uni.one == 0x00 assert uni.instruct.foo == 0x01 @@ -326,7 +333,7 @@ class BitStruct(MemStruct): ])), ] -bit = BitStruct(jitter.vm, addr9) +bit = BitStruct(jitter.vm) bit.memset() assert bit.flags == 0 assert bit.f1_1 == 0 @@ -351,52 +358,54 @@ assert bit.f4_1 == 1 # Unhealthy ideas class UnhealthyIdeas(MemStruct): fields = [ - ("f1", Ptr("I", MemArray, Struct("=Bf"))), - ("f2", Array(Ptr("I", MemStr), 10)), - ("f3", Ptr("I", MemSelf)), - ("f4", Array(Ptr("I", MemSelf), 2)), - ("f5", Ptr("I", Ptr("I", MemSelf))), + ("pastruct", Ptr("I", MemArray, Struct("=Bf"))), + ("apstr", Array(Ptr("I", MemStr), 10)), + ("pself", Ptr("I", MemSelf)), + ("apself", Array(Ptr("I", MemSelf), 2)), + ("ppself", Ptr("I", Ptr("I", MemSelf))), ] # Other way to handle self dependency and circular dependencies # NOTE: in this case, MemSelf would have been fine UnhealthyIdeas.fields.append( - ("f6", Ptr("I", Ptr("I", Ptr("I", UnhealthyIdeas))))) + ("pppself", Ptr("I", Ptr("I", Ptr("I", UnhealthyIdeas))))) # Regen all fields UnhealthyIdeas.gen_fields() -ideas = UnhealthyIdeas(jitter.vm, addr7) +p_size = Ptr("I", MemVoid).size() + +ideas = UnhealthyIdeas(jitter.vm) ideas.memset() -ideas.f3 = ideas.get_addr() -assert ideas == ideas.deref_f3 - -ideas.f4[0] = ideas.get_addr() -assert ideas.f4.deref_get(0) == ideas -ideas.f4[1] = addr6 -ideas.f4.deref_set(1, ideas) -assert ideas.f4[1] != ideas.get_addr() -assert ideas.f4.deref_get(1) == ideas - -ideas.f5 = addr2 -ideas.deref_f5.value = ideas.get_addr() -assert ideas.deref_f5.value == ideas.get_addr() -assert ideas.deref_f5.deref_value == ideas - -ideas.deref_f5.value = addr3 -ideas.deref_f5.deref_value = ideas -assert ideas.deref_f5.value != ideas.get_addr() -assert ideas.deref_f5.deref_value == ideas - -ideas.f6 = addr4 -ideas.deref_f6.value = addr5 -ideas.deref_f6.deref_value.value = ideas.get_addr() -assert ideas.deref_f6.deref_value.deref_value == ideas +ideas.pself = ideas.get_addr() +assert ideas == ideas.deref_pself + +ideas.apself[0] = ideas.get_addr() +assert ideas.apself.deref_get(0) == ideas +ideas.apself[1] = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) +ideas.apself.deref_set(1, ideas) +assert ideas.apself[1] != ideas.get_addr() +assert ideas.apself.deref_get(1) == ideas + +ideas.ppself = my_heap.vm_alloc(jitter.vm, p_size) +ideas.deref_ppself.value = ideas.get_addr() +assert ideas.deref_ppself.value == ideas.get_addr() +assert ideas.deref_ppself.deref_value == ideas + +ideas.deref_ppself.value = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) +ideas.deref_ppself.deref_value = ideas +assert ideas.deref_ppself.value != ideas.get_addr() +assert ideas.deref_ppself.deref_value == ideas + +ideas.pppself = my_heap.vm_alloc(jitter.vm, p_size) +ideas.deref_pppself.value = my_heap.vm_alloc(jitter.vm, p_size) +ideas.deref_pppself.deref_value.value = ideas.get_addr() +assert ideas.deref_pppself.deref_value.deref_value == ideas # Cast tests # MemStruct cast MemInt = mem(Num("I")) MemShort = mem(Num("H")) -dword = MemInt(jitter.vm, addr10) +dword = MemInt(jitter.vm) dword.value = 0x12345678 assert isinstance(dword.cast(MemShort), MemShort) assert dword.cast(MemShort).value == 0x5678 @@ -417,7 +426,7 @@ assert MemShort(jitter.vm, ms2.s2.index2addr(4)).value == 0xabcd # void* style cast MemPtrVoid = mem(Ptr("I", MemVoid)) MemPtrMyStruct = mem(Ptr("I", MyStruct)) -p = MemPtrVoid(jitter.vm, addr11) +p = MemPtrVoid(jitter.vm) p.value = mstruct.get_addr() assert p.deref_value.cast(MyStruct) == mstruct assert p.cast(MemPtrMyStruct).deref_value == mstruct @@ -428,7 +437,6 @@ print repr(ms2), '\n' print repr(cont), '\n' print repr(uni), '\n' print repr(bit), '\n' -print repr(bit), '\n' print repr(ideas), '\n' print repr(mem(Array(Inline(MyStruct2), 2))(jitter.vm, addr)), '\n' print repr(mem(Num("f"))(jitter.vm, addr)), '\n' -- cgit 1.4.1 From e53ac5b2a65f79d6342b1820c68efc126c8e4986 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Sun, 8 Nov 2015 21:27:40 +0100 Subject: MemStruct: minor fixes + toy example script --- example/jitter/memstruct.py | 229 ++++++++++++++++++++++++++++++++++++++++++++ miasm2/analysis/mem.py | 5 + test/analysis/mem.py | 7 +- 3 files changed, 237 insertions(+), 4 deletions(-) create mode 100644 example/jitter/memstruct.py (limited to 'test') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py new file mode 100644 index 00000000..f2e9f8dc --- /dev/null +++ b/example/jitter/memstruct.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python +"""This script is just a short example of common usages for miasm2.analysis.mem. +For a more complete view of what is possible, tests/analysis/mem.py covers +most of the module possibilities, and the module doc gives useful information +as well. +""" + +from miasm2.analysis.machine import Machine +from miasm2.analysis.mem import MemStruct, MemSelf, MemVoid, MemStr,\ + Ptr, Num, Array, set_allocator +from miasm2.os_dep.common import heap + +# Instanciate a heap +my_heap = heap() +# And set it as the default memory allocator, to avoid manual allocation and +# explicit address passing to the MemStruct constructor +set_allocator(my_heap.vm_alloc) + +# Let's reimplement a simple C generic linked list mapped on a VmMngr! + +# All the structures and methods will use the python objects but all the data +# is in fact stored in the VmMngr + +class ListNode(MemStruct): + fields = [ + # The " allocated_address allocator = None +def set_allocator(alloc_func): + global allocator + allocator = alloc_func + + # Helpers def indent(s, size=4): diff --git a/test/analysis/mem.py b/test/analysis/mem.py index d9fe889c..9eba8fca 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -5,11 +5,10 @@ import struct from miasm2.analysis.machine import Machine -import miasm2.analysis.mem as mem_module from miasm2.analysis.mem import MemStruct, Num, Ptr, MemStr, MemArray,\ MemSizedArray, Array, mem_array_type,\ mem_sized_array_type, Struct, Inline, mem,\ - Union, BitField, MemSelf, MemVoid + Union, BitField, MemSelf, MemVoid, set_allocator from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE from miasm2.os_dep.common import heap @@ -74,7 +73,7 @@ assert mstruct.i == 0x11111111 # From now, just use heap.vm_alloc my_heap = heap() -mem_module.allocator = my_heap.vm_alloc +set_allocator(my_heap.vm_alloc) # Ptr tests @@ -445,4 +444,4 @@ print repr(memsarray) print repr(memstr) print repr(memstr3) -print "Ok" # That's all folks! +print "\nOk" # That's all folks! -- cgit 1.4.1 From 496b4f181501e85bee38c078b8e63c0ab6d40600 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Wed, 18 Nov 2015 23:54:34 +0100 Subject: MemStruct: module and MemFields documentation + minor fixes/refactors --- miasm2/analysis/mem.py | 290 +++++++++++++++++++++++++++++++++++++++++++++---- test/analysis/mem.py | 4 +- 2 files changed, 269 insertions(+), 25 deletions(-) (limited to 'test') diff --git a/miasm2/analysis/mem.py b/miasm2/analysis/mem.py index 4ba3f842..be14e013 100644 --- a/miasm2/analysis/mem.py +++ b/miasm2/analysis/mem.py @@ -1,3 +1,81 @@ +"""This module provides classes to manipulate C structures backed by a VmMngr +object (a miasm VM virtual memory). + +The main idea is to declare the fields of the structure in the class: + + # FIXME: "I" => "u32" + class MyStruct(MemStruct): + fields = [ + # Integer field: just struct.pack fields with one value + ("num", Num("I")), + ("flags", Num("B")), + # Ptr fields are Num, but they can also be dereferenced + # (self.deref_). Deref can be read and set. + ("other", Ptr("I", OtherStruct)), + # Ptr to a variable length String + ("s", Ptr("I", MemStr)), + ("i", Ptr("I", Num("I"))), + ] + +And access the fields: + + mstruct = MyStruct(jitter.vm, addr) + mstruct.num = 3 + assert mstruct.num == 3 + mstruct.other = addr2 + mstruct.deref_other = OtherStruct(jitter.vm, addr) + +The `addr` argument can be omited if an allocator is set, in which case the +structure will be automatically allocated in memory: + + my_heap = miasm2.os_dep.common.heap() + set_allocator(my_heap) + +Note that some structures (e.g. MemStr or MemArray) do not have a static size +and cannot be allocated automatically. + + +As you saw previously, to use this module, you just have to inherit from +MemStruct and define a list of (, ). Availabe +MemField classes are: + + - Num: for number (float or int) handling + - Struct: abstraction over a simple struct pack/unpack + - Ptr: a pointer to another MemStruct instance + - Inline: include another MemStruct as a field (equivalent to having a + struct field into another struct in C) + - Array: a fixed size array of MemFields (points) + - Union: similar to `union` in C, list of MemFields at the same offset in a + structure; the union has the size of the biggest MemField + - BitField: similar to C bitfields, a list of + [( integer_address + """ global allocator allocator = alloc_func @@ -18,11 +103,18 @@ def set_allocator(alloc_func): # Helpers def indent(s, size=4): + """Indents a string with @size spaces""" return ' '*size + ('\n' + ' '*size).join(s.split('\n')) - + # FIXME: copied from miasm2.os_dep.common and fixed def get_str_ansi(vm, addr, max_char=None): + """Gets a null terminated ANSI encoded string from a VmMngr. + + Args: + vm: VmMngr instance + max_char: max number of characters to get in memory + """ l = 0 tmp = addr while ((max_char is None or l < max_char) and @@ -34,6 +126,16 @@ def get_str_ansi(vm, addr, max_char=None): # TODO: get_raw_str_utf16 for length calculus def get_str_utf16(vm, addr, max_char=None): + """Gets a (double) null terminated utf16 little endian encoded string from + a VmMngr. This encoding is mainly used in Windows. + + FIXME: the implementation do not work with codepoints that are encoded on + more than 2 bytes in utf16. + + Args: + vm: VmMngr instance + max_char: max number of bytes to get in memory + """ l = 0 tmp = addr # TODO: test if fetching per page rather than 2 byte per 2 byte is worth it? @@ -46,16 +148,26 @@ def get_str_utf16(vm, addr, max_char=None): def set_str_ansi(vm, addr, s): + """Encodes a string to null terminated ascii/ansi and sets it in a VmMngr + memory. + + Args: + vm: VmMngr instance + addr: start address to serialize the string to + s: the str to serialize + """ vm.set_mem(addr, s + "\x00") def set_str_utf16(vm, addr, s): + """Same as set_str_ansi with (double) null terminated utf16 encoding.""" s = (s + '\x00').encode('utf-16le') vm.set_mem(addr, s) # MemField to MemStruct helper +# TODO: cache generated types def mem(field): """Generates a MemStruct subclass from a field. The field's value can be accessed through self.value or self.deref_value if field is a Ptr. @@ -69,33 +181,51 @@ def mem(field): # MemField classes class MemField(object): - """Base class to provide methods to set and get fields from virtual mem.""" + """Base class to provide methods to set and get fields from virtual mem. + + Subclasses can either override _pack and _unpack, or get and set if data + serialization requires more work (see Inline implementation for an example). + """ _self_type = None def _pack(self, val): - """Returns a packed str""" + """Serializes the python value @val to a raw str""" raise NotImplementedError() - def _unpack(self, packed_str): - """Returns an object.""" + def _unpack(self, raw_str): + """Deserializes a raw str to an object representing the python value + of this field. + """ raise NotImplementedError() def set(self, vm, addr, val): + """Set a VmMngr memory from a value. + + Args: + vm: VmMngr instance + addr: the start adress in memory to set + val: the python value to serialize in @vm at @addr + """ raw = self._pack(val) vm.set_mem(addr, raw) def get(self, vm, addr): + """Get the python value of a field from a VmMngr memory at @addr.""" raw = vm.get_mem(addr, self.size()) return self._unpack(raw) - def get_self_type(self): + def _get_self_type(self): return self._self_type - def set_self_type(self, self_type): + def _set_self_type(self, self_type): + """If this field refers to MemSelf, replace it with @self_type (a + MemStruct subclass) when using it. Generally not used outside the lib. + """ self._self_type = self_type def size(self): + """Returns the size in bytes of the serialized version of this field""" raise NotImplementedError() def __len__(self): @@ -103,6 +233,10 @@ class MemField(object): class Struct(MemField): + """Dumb struct.pack/unpack field. Mainly used to factorize code. + + Value is a tuple corresponding to the struct @fmt passed to the constructor. + """ def __init__(self, fmt): self._fmt = fmt @@ -110,8 +244,8 @@ class Struct(MemField): def _pack(self, fields): return struct.pack(self._fmt, *fields) - def _unpack(self, packed_str): - return struct.unpack(self._fmt, packed_str) + def _unpack(self, raw_str): + return struct.unpack(self._fmt, raw_str) def size(self): return struct.calcsize(self._fmt) @@ -121,12 +255,17 @@ class Struct(MemField): class Num(Struct): + """Represents a number (integer or float). The number is encoded with + a struct-style format which must represent only one value. + + TODO: use u32, i16, etc. for format. + """ def _pack(self, number): return super(Num, self)._pack([number]) - def _unpack(self, packed_str): - upck = super(Num, self)._unpack(packed_str) + def _unpack(self, raw_str): + upck = super(Num, self)._unpack(raw_str) if len(upck) > 1: raise ValueError("Num format string unpacks to multiple values, " "should be 1") @@ -134,6 +273,10 @@ class Num(Struct): class Ptr(Num): + """Special case of number of which value indicates the address of a + MemStruct. Provides deref_ as well as when used, to set and + get the pointed MemStruct. + """ def __init__(self, fmt, dst_type, *type_args, **type_kwargs): if not isinstance(dst_type, MemField) and\ @@ -146,31 +289,35 @@ class Ptr(Num): super(Ptr, self).__init__(fmt) if isinstance(dst_type, MemField): # Patch the field to propagate the MemSelf replacement - dst_type.get_self_type = lambda: self.get_self_type() + dst_type._get_self_type = lambda: self._get_self_type() dst_type = mem(dst_type) self._dst_type = dst_type self._type_args = type_args self._type_kwargs = type_kwargs - def set_self_type(self, self_type): - super(Ptr, self).set_self_type(self_type) - def _fix_dst_type(self): if self._dst_type == MemSelf: - if self.get_self_type() is not None: - self._dst_type = self.get_self_type() + if self._get_self_type() is not None: + self._dst_type = self._get_self_type() else: raise ValueError("Unsupported usecase for MemSelf, sorry") @property def dst_type(self): + """Returns the type (MemStruct subtype) this Ptr points to.""" self._fix_dst_type() return self._dst_type def deref_get(self, vm, addr): + """Deserializes the data in @vm (VmMngr) at @addr to self.dst_type. + Equivalent to a pointer dereference rvalue in C. + """ return self.dst_type(vm, addr, *self._type_args, **self._type_kwargs) def deref_set(self, vm, addr, val): + """Serializes the @val MemStruct subclass instance in @vm (VmMngr) at + @addr. Equivalent to a pointer dereference assignment in C. + """ # Sanity check if self.dst_type != val.__class__: log.warning("Original type was %s, overriden by value of type %s", @@ -184,6 +331,25 @@ class Ptr(Num): class Inline(MemField): + """Field used to inline a MemStruct in another MemStruct. Equivalent to + having a struct field in a C struct. + + Concretely: + + class MyStructClass(MemStruct): + fields = [("f1", Num("I")), ("f2", Num("I"))] + + class Example(MemStruct): + fields = [("mystruct", Inline(MyStructClass))] + + ex = Example(vm, addr) + ex.mystruct.f2 = 3 # inlined structure field access + ex.mystruct = MyStructClass(vm, addr2) # struct copy + + It can be seen like a bridge to use a MemStruct as a MemField + + TODO: make the Inline implicit when setting a field to be a MemStruct + """ def __init__(self, inlined_type, *type_args, **type_kwargs): if not issubclass(inlined_type, MemStruct): @@ -207,21 +373,38 @@ class Inline(MemField): class Array(MemField): + """A fixed size array (contiguous sequence) of a MemField subclass + elements. Similar to something like the char[10] type in C. + + Getting an array field actually returns a MemSizedArray. Setting it is + possible with either a list or a MemSizedArray instance. Examples of syntax: + + class Example(MemStruct): + fields = [("array", Array(Num("B"), 4))] + + mystruct = Example(vm, addr) + mystruct.array[3] = 27 + mystruct.array = [1, 4, 8, 9] + mystruct.array = MemSizedArray(vm, addr2, Num("B"), 4) + """ def __init__(self, field_type, length): self._field_type = field_type self._array_len = length - def set_self_type(self, self_type): - super(Array, self).set_self_type(self_type) - self._field_type.set_self_type(self_type) + def _set_self_type(self, self_type): + super(Array, self)._set_self_type(self_type) + self._field_type._set_self_type(self_type) def set(self, vm, addr, val): + # MemSizedArray assignment if isinstance(val, MemSizedArray): if val.array_len != self._array_len or len(val) != self.size(): raise ValueError("Size mismatch in MemSizedArray assignment") raw = str(val) vm.set_mem(addr, raw) + + # list assignment elif isinstance(val, list): if len(val) != self._array_len: raise ValueError("Size mismatch in MemSizedArray assignment ") @@ -229,6 +412,7 @@ class Array(MemField): for elt in val: self._field_type.set(vm, addr + offset, elt) offset += self._field_type.size() + else: raise NotImplementedError( "Assignment only implemented for list and MemSizedArray") @@ -244,8 +428,27 @@ class Array(MemField): class Union(MemField): + """Allows to put multiple fields at the same offset in a MemStruct, similar + to unions in C. The Union will have the size of the largest of its fields. + + Example: + + class Example(MemStruct): + fields = [("uni", Union([ + ("f1", Num(", )] and a + @backing_num. The @backing_num is a Num instance that determines the total + size of the bitfield and the way the bits are serialized/deserialized (big + endian int, little endian short...). Can be seen (and implemented) as a + Union of Bits fields. + + Creates fields that allow to access the bitfield fields easily. Example: + + class Example(MemStruct): + fields = [("bf", BitField(Num("B"), [ + ("f1", 2), + ("f2", 4), + ("f3", 1) + ]) + )] + + ex = Example(vm, addr) + ex.memset() + ex.f2 = 2 + ex.f1 = 5 # 5 does not fit on two bits, it will be binarily truncated + assert ex.f1 == 3 + assert ex.f2 == 2 + assert ex.f3 == 0 # previously memset() + assert ex.bf == 3 + 2 << 2 + """ + def __init__(self, backing_num, bit_list): - """bit_list: [(name, n_bits)]""" + """@backing num: Num intance, @bit_list: [(name, n_bits)]""" self._num = backing_num fields = [] offset = 0 @@ -465,7 +709,7 @@ class MemStruct(object): offset = 0 for name, field in cls.fields: # For reflexion - field.set_self_type(cls) + field._set_self_type(cls) cls.gen_attr(name, field, offset) offset += field.size() cls._size = offset diff --git a/test/analysis/mem.py b/test/analysis/mem.py index 9eba8fca..df1df9bc 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -20,10 +20,10 @@ class OtherStruct(MemStruct): class MyStruct(MemStruct): fields = [ - # Integer field: just struct.pack fields with one value + # Number field: just struct.pack fields with one value ("num", Num("I")), ("flags", Num("B")), - # Ptr fields are Int, but they can also be dereferenced + # Ptr fields are Num, but they can also be dereferenced # (self.deref_). Deref can be read and set. ("other", Ptr("I", OtherStruct)), # Ptr to a variable length String -- cgit 1.4.1 From 9f824878c99d9761f25178b00ca009691697bbe9 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Tue, 24 Nov 2015 11:14:48 +0100 Subject: MemStruct: MemField equality --- miasm2/analysis/mem.py | 75 +++++++++++++++++++++++++++++++++++++------------- test/analysis/mem.py | 38 ++++++++++++++++++++++++- 2 files changed, 93 insertions(+), 20 deletions(-) (limited to 'test') diff --git a/miasm2/analysis/mem.py b/miasm2/analysis/mem.py index 7dda9041..ce389f89 100644 --- a/miasm2/analysis/mem.py +++ b/miasm2/analysis/mem.py @@ -231,6 +231,9 @@ class MemField(object): def __len__(self): return self.size() + def __neq__(self, other): + return not self == other + class Struct(MemField): """Dumb struct.pack/unpack field. Mainly used to factorize code. @@ -253,6 +256,9 @@ class Struct(MemField): def __repr__(self): return "%s(%s)" % (self.__class__.__name__, self._fmt) + def __eq__(self, other): + return self.__class__ == other.__class__ and self._fmt == other._fmt + class Num(Struct): """Represents a number (integer or float). The number is encoded with @@ -339,6 +345,12 @@ class Ptr(Num): def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._dst_type) + def __eq__(self, other): + return super(Ptr, self).__eq__(other) and \ + self.dst_type == other.dst_type and \ + self._type_args == other._type_args and \ + self._type_kwargs == other._type_kwargs + class Inline(MemField): """Field used to inline a MemStruct in another MemStruct. Equivalent to @@ -381,6 +393,12 @@ class Inline(MemField): def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._il_type) + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self._il_type == other._il_type and \ + self._type_args == other._type_args and \ + self._type_kwargs == other._type_kwargs + class Array(MemField): """A fixed size array (contiguous sequence) of a MemField subclass @@ -398,43 +416,48 @@ class Array(MemField): mystruct.array = MemSizedArray(vm, addr2, Num("B"), 4) """ - def __init__(self, field_type, length): - self._field_type = field_type - self._array_len = length + def __init__(self, field_type, array_len): + self.field_type = field_type + self.array_len = array_len def _set_self_type(self, self_type): super(Array, self)._set_self_type(self_type) - self._field_type._set_self_type(self_type) + self.field_type._set_self_type(self_type) def set(self, vm, addr, val): # MemSizedArray assignment if isinstance(val, MemSizedArray): - if val.array_len != self._array_len or len(val) != self.size(): + if val.array_len != self.array_len or len(val) != self.size(): raise ValueError("Size mismatch in MemSizedArray assignment") raw = str(val) vm.set_mem(addr, raw) # list assignment elif isinstance(val, list): - if len(val) != self._array_len: + if len(val) != self.array_len: raise ValueError("Size mismatch in MemSizedArray assignment ") offset = 0 for elt in val: - self._field_type.set(vm, addr + offset, elt) - offset += self._field_type.size() + self.field_type.set(vm, addr + offset, elt) + offset += self.field_type.size() else: raise NotImplementedError( "Assignment only implemented for list and MemSizedArray") def get(self, vm, addr): - return MemSizedArray(vm, addr, self._field_type, self._array_len) + return MemSizedArray(vm, addr, self.field_type, self.array_len) def size(self): - return self._field_type.size() * self._array_len + return self.field_type.size() * self.array_len def __repr__(self): - return "%r[%s]" % (self._field_type, self._array_len) + return "%r[%s]" % (self.field_type, self.array_len) + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self.field_type == other.field_type and \ + self.array_len == other.array_len class Union(MemField): @@ -478,6 +501,10 @@ class Union(MemField): for name, field in self.field_list) return "%s(%s)" % (self.__class__.__name__, fields_repr) + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self.field_list == other.field_list + class Bits(MemField): """Helper class for BitField, not very useful on its own. Represents some @@ -533,6 +560,12 @@ class Bits(MemField): return "%s%r(%d:%d)" % (self.__class__.__name__, self._num, self._bit_offset, self._bit_offset + self._bits) + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self._num == other._num and self._bits == other._bits and \ + self._bit_offset == other._bit_offset + + class BitField(Union): """A C-like bitfield. @@ -580,6 +613,10 @@ class BitField(Union): def get(self, vm, addr): return self._num.get(vm, addr) + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self._num == other._num and super(BitField, self).__eq__(other) + # MemStruct classes @@ -1101,7 +1138,7 @@ def mem_array_type(field_type): class MemSizedArray(MemArray): - """A fixed size MemArray. Its additional arg represents the @length (in + """A fixed size MemArray. Its additional arg represents the @array_len (in number of elements) of this array. This type is dynamically sized. Use mem_sized_array_type to generate a @@ -1109,15 +1146,15 @@ class MemSizedArray(MemArray): """ _array_len = None - def __init__(self, vm, addr=None, field_type=None, length=None): + def __init__(self, vm, addr=None, field_type=None, array_len=None): # Set the length before anything else to allow get_size() to work for # allocation if self._array_len is None: - self._array_len = length + self._array_len = array_len super(MemSizedArray, self).__init__(vm, addr, field_type) if self._array_len is None or self._field_type is None: raise NotImplementedError( - "Provide field_type and length to instanciate this class, " + "Provide field_type and array_len to instanciate this class, " "or generate a subclass with mem_sized_array_type.") @property @@ -1159,18 +1196,18 @@ class MemSizedArray(MemArray): return "[%s] [%r; %s]" % (items, self._field_type, self._array_len) -def mem_sized_array_type(field_type, length): +def mem_sized_array_type(field_type, array_len): """Generate a MemSizedArray subclass that has a fixed @field_type and a - fixed @length. This allows to instanciate the returned type with only + fixed @array_len. This allows to instanciate the returned type with only the vm and addr arguments, as are standard MemStructs. """ @classmethod def sizeof(cls): return cls._field_type.size() * cls._array_len - array_type = type('MemSizedArray_%r_%s' % (field_type, length), + array_type = type('MemSizedArray_%r_%s' % (field_type, array_len), (MemSizedArray,), - {'_array_len': length, + {'_array_len': array_len, '_field_type': field_type, 'sizeof': sizeof}) return array_type diff --git a/test/analysis/mem.py b/test/analysis/mem.py index df1df9bc..a3642a4f 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -8,7 +8,8 @@ from miasm2.analysis.machine import Machine from miasm2.analysis.mem import MemStruct, Num, Ptr, MemStr, MemArray,\ MemSizedArray, Array, mem_array_type,\ mem_sized_array_type, Struct, Inline, mem,\ - Union, BitField, MemSelf, MemVoid, set_allocator + Union, BitField, MemSelf, MemVoid, Bits, \ + set_allocator from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE from miasm2.os_dep.common import heap @@ -430,6 +431,41 @@ p.value = mstruct.get_addr() assert p.deref_value.cast(MyStruct) == mstruct assert p.cast(MemPtrMyStruct).deref_value == mstruct +# Field equality tests +assert Struct("IH") == Struct("IH") +assert Struct("I") != Struct("IH") +assert Num("I") == Num("I") +assert Num(">I") != Num("I", MyStruct) != Ptr(" Date: Tue, 24 Nov 2015 17:33:13 +0100 Subject: MemStruct: dyn types (returned by mem*()) are now cached --- miasm2/analysis/mem.py | 63 ++++++++++++++++++++++++++++++++------------------ test/analysis/mem.py | 16 ++++++++++--- 2 files changed, 53 insertions(+), 26 deletions(-) (limited to 'test') diff --git a/miasm2/analysis/mem.py b/miasm2/analysis/mem.py index da12f5ba..2e52ec1a 100644 --- a/miasm2/analysis/mem.py +++ b/miasm2/analysis/mem.py @@ -85,8 +85,11 @@ console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) log.addHandler(console_handler) log.setLevel(logging.WARN) -# allocator is a function(vm, size) -> allocated_address -allocator = None +# ALLOCATOR is a function(vm, size) -> allocated_address +ALLOCATOR = None + +# Cache for dynamically generated MemStructs +DYN_MEM_STRUCT_CACHE = {} def set_allocator(alloc_func): """Set an allocator for this module; allows to instanciate statically sized @@ -96,8 +99,8 @@ def set_allocator(alloc_func): Args: alloc_func: func(VmMngr) -> integer_address """ - global allocator - allocator = alloc_func + global ALLOCATOR + ALLOCATOR = alloc_func # Helpers @@ -167,14 +170,17 @@ def set_str_utf16(vm, addr, s): # MemField to MemStruct helper -# TODO: cache generated types def mem(field): """Generate a MemStruct subclass from a field. The field's value can be accessed through self.value or self.deref_value if field is a Ptr. """ + if field in DYN_MEM_STRUCT_CACHE: + return DYN_MEM_STRUCT_CACHE[field] + fields = [("value", field)] # Build a type to contain the field type mem_type = type("Mem%r" % field, (MemStruct,), {'fields': fields}) + DYN_MEM_STRUCT_CACHE[field] = mem_type return mem_type @@ -356,7 +362,7 @@ class Ptr(Num): def __hash__(self): return hash(super(Ptr, self).__hash__() + hash(self._dst_type) + - hash(self._type_args) + hash(self._type_kwargs)) + hash(self._type_args)) class Inline(MemField): @@ -408,7 +414,7 @@ class Inline(MemField): def __hash__(self): return hash(hash(self.__class__) + hash(self._il_type) + - hash(self._type_args) + hash(self._type_kwargs)) + hash(self._type_args)) class Array(MemField): @@ -521,7 +527,7 @@ class Union(MemField): self.field_list == other.field_list def __hash__(self): - return hash(hash(self.__class__) + hash(self.field_list)) + return hash(hash(self.__class__) + hash(tuple(self.field_list))) class Bits(MemField): @@ -716,14 +722,15 @@ class MemStruct(object): # Classic usage methods def __init__(self, vm, addr=None, *args, **kwargs): - global allocator + global ALLOCATOR super(MemStruct, self).__init__(*args, **kwargs) self._vm = vm if addr is None: - if allocator is None: + if ALLOCATOR is None: raise ValueError("Cannot provide None address to MemStruct() if" - "%s.allocator is not set." % __name__) - self._addr = allocator(vm, self.get_size()) + "%s.set_allocator has not been called." + % __name__) + self._addr = ALLOCATOR(vm, self.get_size()) else: self._addr = addr @@ -1151,17 +1158,6 @@ class MemArray(MemStruct): return "[%r, ...] [%r]" % (self[0], self._field_type) -def mem_array_type(field_type): - """Generate a MemArray subclass that has a fixed @field_type. It allows to - instanciate this class with only vm and addr argument, as are standard - MemStructs. - """ - array_type = type('MemArray_%r' % (field_type,), - (MemArray,), - {'_field_type': field_type}) - return array_type - - class MemSizedArray(MemArray): """A fixed size MemArray. Its additional arg represents the @array_len (in number of elements) of this array. @@ -1221,11 +1217,31 @@ class MemSizedArray(MemArray): return "[%s] [%r; %s]" % (items, self._field_type, self._array_len) +def mem_array_type(field_type): + """Generate a MemArray subclass that has a fixed @field_type. It allows to + instanciate this class with only vm and addr argument, as are standard + MemStructs. + """ + cache_key = (field_type, None) + if cache_key in DYN_MEM_STRUCT_CACHE: + return DYN_MEM_STRUCT_CACHE[cache_key] + + array_type = type('MemArray_%r' % (field_type,), + (MemArray,), + {'_field_type': field_type}) + DYN_MEM_STRUCT_CACHE[cache_key] = array_type + return array_type + + def mem_sized_array_type(field_type, array_len): """Generate a MemSizedArray subclass that has a fixed @field_type and a fixed @array_len. This allows to instanciate the returned type with only the vm and addr arguments, as are standard MemStructs. """ + cache_key = (field_type, array_len) + if cache_key in DYN_MEM_STRUCT_CACHE: + return DYN_MEM_STRUCT_CACHE[cache_key] + @classmethod def sizeof(cls): return cls._field_type.size() * cls._array_len @@ -1235,5 +1251,6 @@ def mem_sized_array_type(field_type, array_len): {'_array_len': array_len, '_field_type': field_type, 'sizeof': sizeof}) + DYN_MEM_STRUCT_CACHE[cache_key] = array_type return array_type diff --git a/test/analysis/mem.py b/test/analysis/mem.py index a3642a4f..d0590ebc 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -292,9 +292,6 @@ assert cont.instruct.bar == 0x03 assert cont.last == 0x04 assert jitter.vm.get_mem(cont.get_addr(), len(cont)) == '\x01\x02\x03\x04' -# Quick mem(MemField) test: -assert mem(Num("f"))(jitter.vm, addr) == mem(Num("f"))(jitter.vm, addr) - # Union test class UniStruct(MemStruct): @@ -464,6 +461,19 @@ assert BitField(Num("B"), [("f1", 1), ("f2", 4), ("f3", 1)]) != \ BitField(Num("B"), [("f1", 2), ("f2", 4), ("f3", 1)]) +# Quick mem(MemField)/MemField hash test: +assert mem(Num("f"))(jitter.vm, addr) == mem(Num("f"))(jitter.vm, addr) +# Types are cached +assert mem(Num("f")) == mem(Num("f")) +assert mem(Num("d")) != mem(Num("f")) +assert mem(Union([("f1", Num("I")), ("f2", Num("H"))])) == \ + mem(Union([("f1", Num("I")), ("f2", Num("H"))])) +assert mem_array_type(Num("B")) == mem_array_type(Num("B")) +assert mem_array_type(Num("I")) != mem_array_type(Num("B")) +assert mem_sized_array_type(Num("B"), 20) == mem_sized_array_type(Num("B"), 20) +assert mem_sized_array_type(Num("B"), 19) != mem_sized_array_type(Num("B"), 20) + + # Repr tests print "Some struct reprs:\n" -- cgit 1.4.1 From 955595f40dfa7a8bb246050dd3bbb5a4ecb67f0e Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Thu, 26 Nov 2015 14:19:34 +0100 Subject: MemStruct: MemSelf usage fix --- miasm2/analysis/mem.py | 10 +++++++++- test/analysis/mem.py | 8 +++++--- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/miasm2/analysis/mem.py b/miasm2/analysis/mem.py index 006fecd0..dbfd335a 100644 --- a/miasm2/analysis/mem.py +++ b/miasm2/analysis/mem.py @@ -314,7 +314,13 @@ class Ptr(Num): if isinstance(dst_type, MemField): # Patch the field to propagate the MemSelf replacement dst_type._get_self_type = lambda: self._get_self_type() - dst_type = mem(dst_type) + # dst_type cannot be patched here, since _get_self_type of the outer + # class has not yet been set. Patching dst_type involves calling + # mem(dst_type), which will only return a type that does not point + # on MemSelf but on the right class only when _get_self_type of the + # outer class has been replaced by _MetaMemStruct. + # In short, dst_type = mem(dst_type) is not valid here, it is done + # lazily in _fix_dst_type self._dst_type = dst_type self._type_args = type_args self._type_kwargs = type_kwargs @@ -325,6 +331,8 @@ class Ptr(Num): self._dst_type = self._get_self_type() else: raise ValueError("Unsupported usecase for MemSelf, sorry") + if isinstance(self._dst_type, MemField): + self._dst_type = mem(self._dst_type) @property def dst_type(self): diff --git a/test/analysis/mem.py b/test/analysis/mem.py index d0590ebc..e9c5d60b 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -183,13 +183,13 @@ assert memarray[2:4] == [3, 3] try: memarray[2:4] = [3, 3, 3] assert False, "Should raise, mismatched sizes" -except (ValueError): +except ValueError: pass try: memarray[1, 2] assert False, "Should raise, mismatched sizes" -except (ValueError): +except ValueError: pass @@ -360,12 +360,13 @@ class UnhealthyIdeas(MemStruct): ("pself", Ptr("I", MemSelf)), ("apself", Array(Ptr("I", MemSelf), 2)), ("ppself", Ptr("I", Ptr("I", MemSelf))), + ("pppself", Ptr("I", Ptr("I", Ptr("I", MemSelf)))), ] # Other way to handle self dependency and circular dependencies # NOTE: in this case, MemSelf would have been fine UnhealthyIdeas.fields.append( - ("pppself", Ptr("I", Ptr("I", Ptr("I", UnhealthyIdeas))))) + ("pppself2", Ptr("I", Ptr("I", Ptr("I", UnhealthyIdeas))))) # Regen all fields UnhealthyIdeas.gen_fields() @@ -398,6 +399,7 @@ ideas.deref_pppself.value = my_heap.vm_alloc(jitter.vm, p_size) ideas.deref_pppself.deref_value.value = ideas.get_addr() assert ideas.deref_pppself.deref_value.deref_value == ideas + # Cast tests # MemStruct cast MemInt = mem(Num("I")) -- cgit 1.4.1 From 392a8eea91d6114c0f3480118354bf7f95ed9096 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Thu, 26 Nov 2015 14:29:36 +0100 Subject: MemStruct: Struct -> RawStruct --- miasm2/analysis/mem.py | 6 +++--- test/analysis/mem.py | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'test') diff --git a/miasm2/analysis/mem.py b/miasm2/analysis/mem.py index dbfd335a..ef41366b 100644 --- a/miasm2/analysis/mem.py +++ b/miasm2/analysis/mem.py @@ -41,7 +41,7 @@ MemStruct and define a list of (, ). Available MemField classes are: - Num: for number (float or int) handling - - Struct: abstraction over a simple struct pack/unpack + - RawStruct: abstraction over a simple struct pack/unpack - Ptr: a pointer to another MemStruct instance - Inline: include another MemStruct as a field (equivalent to having a struct field into another struct in C) @@ -240,7 +240,7 @@ class MemField(object): return not self == other -class Struct(MemField): +class RawStruct(MemField): """Dumb struct.pack/unpack field. Mainly used to factorize code. Value is a tuple corresponding to the struct @fmt passed to the constructor. @@ -268,7 +268,7 @@ class Struct(MemField): return hash((self.__class__, self._fmt)) -class Num(Struct): +class Num(RawStruct): """Represents a number (integer or float). The number is encoded with a struct-style format which must represent only one value. diff --git a/test/analysis/mem.py b/test/analysis/mem.py index e9c5d60b..bb5ffbe8 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -7,7 +7,7 @@ import struct from miasm2.analysis.machine import Machine from miasm2.analysis.mem import MemStruct, Num, Ptr, MemStr, MemArray,\ MemSizedArray, Array, mem_array_type,\ - mem_sized_array_type, Struct, Inline, mem,\ + mem_sized_array_type, RawStruct, Inline, mem,\ Union, BitField, MemSelf, MemVoid, Bits, \ set_allocator from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE @@ -211,10 +211,10 @@ assert memsarray[0] == 2 assert str(memsarray) == '\x02\x00\x00\x00' + '\xcc' * (4 * 9) -# Atypical fields (Struct and Array) +# Atypical fields (RawStruct and Array) class MyStruct2(MemStruct): fields = [ - ("s1", Struct("=BI")), + ("s1", RawStruct("=BI")), ("s2", Array(Num("B"), 10)), ] @@ -222,7 +222,7 @@ ms2 = MyStruct2(jitter.vm) ms2.memset('\xaa') assert len(ms2) == 15 -## Struct +## RawStruct assert len(ms2.s1) == 2 assert ms2.s1[0] == 0xaa assert ms2.s1[1] == 0xaaaaaaaa @@ -355,7 +355,7 @@ assert bit.f4_1 == 1 # Unhealthy ideas class UnhealthyIdeas(MemStruct): fields = [ - ("pastruct", Ptr("I", MemArray, Struct("=Bf"))), + ("pastruct", Ptr("I", MemArray, RawStruct("=Bf"))), ("apstr", Array(Ptr("I", MemStr), 10)), ("pself", Ptr("I", MemSelf)), ("apself", Array(Ptr("I", MemSelf), 2)), @@ -431,8 +431,8 @@ assert p.deref_value.cast(MyStruct) == mstruct assert p.cast(MemPtrMyStruct).deref_value == mstruct # Field equality tests -assert Struct("IH") == Struct("IH") -assert Struct("I") != Struct("IH") +assert RawStruct("IH") == RawStruct("IH") +assert RawStruct("I") != RawStruct("IH") assert Num("I") == Num("I") assert Num(">I") != Num(" Date: Fri, 27 Nov 2015 13:18:44 +0100 Subject: MemStruct: Big refactoring, Mem* -> Pinned* This commit is the first phase of the Type refactor. The PinnedType class has been separated from the more specific PinnedStruct class. --- example/jitter/memstruct.py | 48 ++--- miasm2/analysis/mem.py | 469 +++++++++++++++++++++++--------------------- test/analysis/mem.py | 112 +++++------ 3 files changed, 325 insertions(+), 304 deletions(-) (limited to 'test') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index a79686a8..645c019e 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -6,14 +6,14 @@ as well. """ from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import MemStruct, MemSelf, MemVoid, MemStr,\ - MemSizedArray, Ptr, Num, Array, set_allocator +from miasm2.analysis.mem import PinnedStruct, PinnedSelf, PinnedVoid, PinnedStr,\ + PinnedSizedArray, Ptr, Num, Array, set_allocator from miasm2.os_dep.common import heap # Instanciate a heap my_heap = heap() # And set it as the default memory allocator, to avoid manual allocation and -# explicit address passing to the MemStruct constructor +# explicit address passing to the PinnedStruct constructor set_allocator(my_heap.vm_alloc) # Let's reimplement a simple C generic linked list mapped on a VmMngr! @@ -21,18 +21,18 @@ set_allocator(my_heap.vm_alloc) # All the structures and methods will use the python objects but all the data # is in fact stored in the VmMngr -class ListNode(MemStruct): +class ListNode(PinnedStruct): fields = [ # The " "u32" - class MyStruct(MemStruct): + class MyStruct(PinnedStruct): fields = [ # Integer field: just struct.pack fields with one value ("num", Num("I")), @@ -13,7 +13,7 @@ The main idea is to declare the fields of the structure in the class: # (self.deref_). Deref can be read and set. ("other", Ptr("I", OtherStruct)), # Ptr to a variable length String - ("s", Ptr("I", MemStr)), + ("s", Ptr("I", PinnedStr)), ("i", Ptr("I", Num("I"))), ] @@ -32,18 +32,18 @@ structure will be automatically allocated in memory: # the allocator is a func(VmMngr) -> integer_address set_allocator(my_heap) -Note that some structures (e.g. MemStr or MemArray) do not have a static size -and cannot be allocated automatically. +Note that some structures (e.g. PinnedStr or PinnedArray) do not have a static +size and cannot be allocated automatically. As you saw previously, to use this module, you just have to inherit from -MemStruct and define a list of (, ). Available +PinnedStruct and define a list of (, ). Available Type classes are: - Num: for number (float or int) handling - RawStruct: abstraction over a simple struct pack/unpack - - Ptr: a pointer to another MemStruct instance - - Inline: include another MemStruct as a field (equivalent to having a + - Ptr: a pointer to another PinnedType instance + - FIXME: TODEL Inline: include another PinnedStruct as a field (equivalent to having a struct field into another struct in C) - Array: a fixed size array of Types (points) - Union: similar to `union` in C, list of Types at the same offset in a @@ -56,24 +56,25 @@ A Type always has a fixed size in memory. Some special memory structures are already implemented; they all are subclasses -of MemStruct with a custom implementation: - - - MemSelf: this class is just a special marker to reference a MemStruct - subclass inside itself. Works with Ptr and Array (e.g. Ptr(_, MemSelf) - for a pointer the same type as the class who uses this kind of field) - - MemVoid: empty MemStruct, placeholder to be casted to an implemented - MemStruct subclass - - MemStr: represents a string in memory; the encoding can be passed to the +of PinnedType with a custom implementation: + + - PinnedSelf: this class is just a special marker to reference a + PinnedStruct subclass inside itself. Works with Ptr and Array (e.g. + Ptr(_, PinnedSelf) for a pointer the same type as the class who uses this + kind of field) + - PinnedVoid: empty PinnedType, placeholder to be casted to an implemented + PinnedType subclass + - PinnedStr: represents a string in memory; the encoding can be passed to the constructor (null terminated ascii/ansi or null terminated utf16) - - MemArray: an unsized array of Type; unsized here means that there is + - PinnedArray: an unsized array of Type; unsized here means that there is no defined sized for this array, equivalent to a int* or char*-style table in C. It cannot be allocated automatically, since it has no known size - - MemSizedArray: a sized MemArray, can be automatically allocated in memory - and allows more operations than MemArray - - mem: a function that dynamically generates a MemStruct subclass from a + - PinnedSizedArray: a sized PinnedArray, can be automatically allocated in memory + and allows more operations than PinnedArray + - pin: a function that dynamically generates a PinnedStruct subclass from a Type. This class has only one field named "value". -A MemStruct do not always have a static size (cls.sizeof()) nor a dynamic size +A PinnedType do not always have a static size (cls.sizeof()) nor a dynamic size (self.get_size()). """ @@ -87,15 +88,15 @@ log.addHandler(console_handler) log.setLevel(logging.WARN) # ALLOCATOR is a function(vm, size) -> allocated_address -# TODO: as a MemStruct class attribute +# TODO: as a PinnedType class attribute ALLOCATOR = None -# Cache for dynamically generated MemStructs +# Cache for dynamically generated PinnedTypes DYN_MEM_STRUCT_CACHE = {} def set_allocator(alloc_func): """Set an allocator for this module; allows to instanciate statically sized - MemStructs (i.e. sizeof() is implemented) without specifying the address + PinnedTypes (i.e. sizeof() is implemented) without specifying the address (the object is allocated by @alloc_func in the vm. @alloc_func: func(VmMngr) -> integer_address @@ -166,10 +167,10 @@ def set_str_utf16(vm, addr, s): vm.set_mem(addr, s) -# Type to MemStruct helper +# Type to PinnedType helper -def mem(field): - """Generate a MemStruct subclass from a field. The field's value can +def pin(field): + """Generate a PinnedStruct subclass from a field. The field's value can be accessed through self.value or self.deref_value if field is a Ptr. @field: a Type instance. @@ -179,7 +180,7 @@ def mem(field): fields = [("value", field)] # Build a type to contain the field type - mem_type = type("Mem%r" % field, (MemStruct,), {'fields': fields}) + mem_type = type("Pinned%r" % field, (PinnedStruct,), {'fields': fields}) DYN_MEM_STRUCT_CACHE[field] = mem_type return mem_type @@ -187,7 +188,7 @@ def mem(field): # Type classes class Type(object): - """Base class to provide methods to set and get fields from virtual mem. + """Base class to provide methods to set and get fields from virtual pin. Subclasses can either override _pack and _unpack, or get and set if data serialization requires more work (see Inline implementation for an example). @@ -225,8 +226,8 @@ class Type(object): return self._self_type def _set_self_type(self, self_type): - """If this field refers to MemSelf, replace it with @self_type (a - MemStruct subclass) when using it. Generally not used outside the lib. + """If this field refers to PinnedSelf, replace it with @self_type (a + PinnedType subclass) when using it. Generally not used outside the lib. """ self._self_type = self_type @@ -289,55 +290,55 @@ class Num(RawStruct): class Ptr(Num): """Special case of number of which value indicates the address of a - MemStruct. Provides deref_ as well as when used, to set and - get the pointed MemStruct. + PinnedType. Provides deref_ as well as when used, to set and + get the pointed PinnedType. """ def __init__(self, fmt, dst_type, *type_args, **type_kwargs): """ @fmt: (str) Num compatible format that will be the Ptr representation in memory - @dst_type: (MemStruct or Type) the MemStruct this Ptr points to. - If a Type is given, it is transformed into a MemStruct with - mem(TheType). + @dst_type: (PinnedType or Type) the PinnedType this Ptr points to. + If a Type is given, it is transformed into a PinnedType with + pin(TheType). *type_args, **type_kwargs: arguments to pass to the the pointed - MemStruct when instanciating it (e.g. for MemStr encoding or - MemArray field_type). + PinnedType when instanciating it (e.g. for PinnedStr encoding or + PinnedArray field_type). """ if (not isinstance(dst_type, Type) and not (isinstance(dst_type, type) and - issubclass(dst_type, MemStruct)) and - not dst_type == MemSelf): - raise ValueError("dst_type of Ptr must be a MemStruct type, a " - "Type instance, the MemSelf marker or a class " + issubclass(dst_type, PinnedType)) and + not dst_type == PinnedSelf): + raise ValueError("dst_type of Ptr must be a PinnedType type, a " + "Type instance, the PinnedSelf marker or a class " "name.") super(Ptr, self).__init__(fmt) if isinstance(dst_type, Type): - # Patch the field to propagate the MemSelf replacement + # Patch the field to propagate the PinnedSelf replacement dst_type._get_self_type = lambda: self._get_self_type() # dst_type cannot be patched here, since _get_self_type of the outer # class has not yet been set. Patching dst_type involves calling - # mem(dst_type), which will only return a type that does not point - # on MemSelf but on the right class only when _get_self_type of the - # outer class has been replaced by _MetaMemStruct. - # In short, dst_type = mem(dst_type) is not valid here, it is done + # pin(dst_type), which will only return a type that does not point + # on PinnedSelf but on the right class only when _get_self_type of the + # outer class has been replaced by _MetaPinnedStruct. + # In short, dst_type = pin(dst_type) is not valid here, it is done # lazily in _fix_dst_type self._dst_type = dst_type self._type_args = type_args self._type_kwargs = type_kwargs def _fix_dst_type(self): - if self._dst_type == MemSelf: + if self._dst_type == PinnedSelf: if self._get_self_type() is not None: self._dst_type = self._get_self_type() else: - raise ValueError("Unsupported usecase for MemSelf, sorry") + raise ValueError("Unsupported usecase for PinnedSelf, sorry") if isinstance(self._dst_type, Type): - self._dst_type = mem(self._dst_type) + self._dst_type = pin(self._dst_type) @property def dst_type(self): - """Return the type (MemStruct subtype) this Ptr points to.""" + """Return the type (PinnedType subtype) this Ptr points to.""" self._fix_dst_type() return self._dst_type @@ -348,7 +349,7 @@ class Ptr(Num): return self.dst_type(vm, addr, *self._type_args, **self._type_kwargs) def deref_set(self, vm, addr, val): - """Serializes the @val MemStruct subclass instance in @vm (VmMngr) at + """Serializes the @val PinnedType subclass instance in @vm (VmMngr) at @addr. Equivalent to a pointer dereference assignment in C. """ # Sanity check @@ -374,29 +375,29 @@ class Ptr(Num): class Inline(Type): - """Field used to inline a MemStruct in another MemStruct. Equivalent to + """Field used to inline a PinnedType in another PinnedType. Equivalent to having a struct field in a C struct. Concretely: - class MyStructClass(MemStruct): + class MyStructClass(PinnedStruct): fields = [("f1", Num("I")), ("f2", Num("I"))] - class Example(MemStruct): + class Example(PinnedStruct): fields = [("mystruct", Inline(MyStructClass))] ex = Example(vm, addr) ex.mystruct.f2 = 3 # inlined structure field access ex.mystruct = MyStructClass(vm, addr2) # struct copy - It can be seen like a bridge to use a MemStruct as a Type + It can be seen like a bridge to use a PinnedStruct as a Type - TODO: make the Inline implicit when setting a field to be a MemStruct + TODO: make the Inline implicit when setting a field to be a PinnedStruct """ def __init__(self, inlined_type, *type_args, **type_kwargs): - if not issubclass(inlined_type, MemStruct): - raise ValueError("inlined type if Inline must be a MemStruct") + if not issubclass(inlined_type, PinnedStruct): + raise ValueError("inlined type if Inline must be a PinnedStruct") self._il_type = inlined_type self._type_args = type_args self._type_kwargs = type_kwargs @@ -428,16 +429,16 @@ class Array(Type): """A fixed size array (contiguous sequence) of a Type subclass elements. Similar to something like the char[10] type in C. - Getting an array field actually returns a MemSizedArray. Setting it is - possible with either a list or a MemSizedArray instance. Examples of syntax: + Getting an array field actually returns a PinnedSizedArray. Setting it is + possible with either a list or a PinnedSizedArray instance. Examples of syntax: - class Example(MemStruct): + class Example(PinnedStruct): fields = [("array", Array(Num("B"), 4))] mystruct = Example(vm, addr) mystruct.array[3] = 27 mystruct.array = [1, 4, 8, 9] - mystruct.array = MemSizedArray(vm, addr2, Num("B"), 4) + mystruct.array = PinnedSizedArray(vm, addr2, Num("B"), 4) """ def __init__(self, field_type, array_len): @@ -449,17 +450,17 @@ class Array(Type): self.field_type._set_self_type(self_type) def set(self, vm, addr, val): - # MemSizedArray assignment - if isinstance(val, MemSizedArray): + # PinnedSizedArray assignment + if isinstance(val, PinnedSizedArray): if val.array_len != self.array_len or len(val) != self.size(): - raise ValueError("Size mismatch in MemSizedArray assignment") + raise ValueError("Size mismatch in PinnedSizedArray assignment") raw = str(val) vm.set_mem(addr, raw) # list assignment elif isinstance(val, list): if len(val) != self.array_len: - raise ValueError("Size mismatch in MemSizedArray assignment ") + raise ValueError("Size mismatch in PinnedSizedArray assignment ") offset = 0 for elt in val: self.field_type.set(vm, addr + offset, elt) @@ -467,10 +468,10 @@ class Array(Type): else: raise RuntimeError( - "Assignment only implemented for list and MemSizedArray") + "Assignment only implemented for list and PinnedSizedArray") def get(self, vm, addr): - return MemSizedArray(vm, addr, self.field_type, self.array_len) + return PinnedSizedArray(vm, addr, self.field_type, self.array_len) def size(self): return self.field_type.size() * self.array_len @@ -488,12 +489,12 @@ class Array(Type): class Union(Type): - """Allows to put multiple fields at the same offset in a MemStruct, similar + """Allows to put multiple fields at the same offset in a PinnedStruct, similar to unions in C. The Union will have the size of the largest of its fields. Example: - class Example(MemStruct): + class Example(PinnedStruct): fields = [("uni", Union([ ("f1", Num(". + useless most of the time since fields are accessible via self.. """ if name not in self._attrs: - raise AttributeError("'%s' object has no attribute '%s'" + raise attributeerror("'%s' object has no attribute '%s'" % (self.__class__.__name__, name)) field = self._attrs[name]["field"] offset = self._attrs[name]["offset"] return field.get(self._vm, self.get_addr() + offset) def set_field(self, name, val): - """Set a field value by name. @val is the python value corresponding to + """set a field value by name. @val is the python value corresponding to this field type. - Useless most of the time since fields are accessible via self.. + useless most of the time since fields are accessible via self.. """ if name not in self._attrs: - raise AttributeError("'%s' object has no attribute '%s'" + raise attributeerror("'%s' object has no attribute '%s'" % (self.__class__.__name__, name)) field = self._attrs[name]["field"] offset = self._attrs[name]["offset"] field.set(self._vm, self.get_addr() + offset, val) def deref_field(self, name): - """Get the MemStruct pointed by field. + """get the memstruct pointed by field. - Useless most of the time since fields are accessible via + useless most of the time since fields are accessible via self.deref_. """ addr = self.get_field(name) field = self._attrs[name]["field"] assert isinstance(field, Ptr),\ - "Programming error: field should be a Ptr" + "programming error: field should be a Ptr" return field.deref_get(self._vm, addr) def set_deref_field(self, name, val): - """Set the MemStruct pointed by field. @val should be of the - type of the pointed MemStruct. The field must be a Ptr. + """set the memstruct pointed by field. @val should be of the + type of the pointed memstruct. the field must be a Ptr. - Useless most of the time since fields are accessible via + useless most of the time since fields are accessible via self.deref_. """ addr = self.get_field(name) field = self._attrs[name]["field"] assert isinstance(field, Ptr),\ - "Programming error: field should be a Ptr" + "programming error: field should be a Ptr" field.deref_set(self._vm, addr, val) - def memset(self, byte='\x00'): - """Fill the memory space of this MemStruct with @byte ('\x00' by - default). The size is retrieved with self.get_size() (dynamic size). - """ - # TODO: multibyte patterns - if not isinstance(byte, str) or not len(byte) == 1: - raise ValueError("byte must be a 1-lengthed str") - self._vm.set_mem(self.get_addr(), byte * self.get_size()) - - def cast(self, other_type, *type_args, **type_kwargs): - """Cast this MemStruct to another MemStruct (same address, same vm, but - different type). Return the casted MemStruct. + def cast_field(self, field, other_type, *type_args, **type_kwargs): """ - return self.cast_field(None, other_type, *type_args, **type_kwargs) - - def cast_field(self, field_name, other_type, *type_args, **type_kwargs): - """Same as cast, but the address of the returned MemStruct is the - address at which @field_name is in the current MemStruct. + @field: a field name """ - return other_type(self._vm, self.get_addr(field_name), + return other_type(self._vm, self.get_addr(field), *type_args, **type_kwargs) - def __len__(self): - return self.get_size() - - def raw(self): - """Raw binary (str) representation of the MemStruct as it is in - memory. - """ - attrs = sorted(self._attrs.itervalues(), key=lambda a: a["offset"]) - out = [] - for attr in attrs: - field = attr["field"] - offset = attr["offset"] - out.append(self._vm.get_mem(self.get_addr() + offset, field.size())) - return ''.join(out) - - def __str__(self): - return self.raw() - - def __repr__(self): - attrs = sorted(self._attrs.iteritems(), key=lambda a: a[1]["offset"]) - out = [] - for name, attr in attrs: - field = attr["field"] - val_repr = repr(self.get_field(name)) - if '\n' in val_repr: - val_repr = '\n' + indent(val_repr, 4) - out.append("%s: %r = %s" % (name, field, val_repr)) - return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) - - def __eq__(self, other): - return self.__class__ == other.__class__ and str(self) == str(other) - - def __ne__(self, other): - return not self == other # Field generation methods, voluntarily public to be able to regen fields # after class definition @@ -896,18 +917,18 @@ class MemStruct(object): Useful in case of a type cyclic dependency. For example, the following is not possible in python: - class A(MemStruct): + class A(PinnedStruct): fields = [("b", Ptr("I", B))] - class B(MemStruct): + class B(PinnedStruct): fields = [("a", Ptr("I", A))] With gen_fields, the following is the legal equivalent: - class A(MemStruct): + class A(PinnedStruct): pass - class B(MemStruct): + class B(PinnedStruct): fields = [("a", Ptr("I", A))] A.fields = [("b", Ptr("I", B))] @@ -962,21 +983,21 @@ class MemStruct(object): cls.gen_field(name, field, offset) -class MemSelf(MemStruct): +class PinnedSelf(PinnedStruct): """Special Marker class for reference to current class in a Ptr or Array (mostly Array of Ptr). Example: - class ListNode(MemStruct): + class ListNode(PinnedStruct): fields = [ - ("next", Ptr("). Deref can be read and set. ("other", Ptr("I", OtherStruct)), # Ptr to a variable length String - ("s", Ptr("I", MemStr)), + ("s", Ptr("I", PinnedStr)), ("i", Ptr("I", Num("I"))), ] @@ -43,7 +43,7 @@ addr_str3 = 0x1300 jitter.vm.add_memory_page(addr, PAGE_READ | PAGE_WRITE, "\xaa"*size) -# MemStruct tests +# PinnedStruct tests ## Creation # Use manual allocation with explicit addr for the first example mstruct = MyStruct(jitter.vm, addr) @@ -57,7 +57,7 @@ assert mstruct.num == 3 memval = struct.unpack("I", jitter.vm.get_mem(mstruct.get_addr(), 4))[0] assert memval == 3 -## Memset sets the whole structure +## Pinnedset sets the whole structure mstruct.memset() assert mstruct.num == 0 assert mstruct.flags == 0 @@ -103,7 +103,7 @@ assert other2.foo == 0xbeef assert other.get_addr() != other2.get_addr() # Not the same address assert other == other2 # But same value -## Same stuff for Ptr to MemField +## Same stuff for Ptr to PinnedField alloc_addr = my_heap.vm_alloc(jitter.vm, mstruct.get_field_type("i").dst_type.sizeof()) mstruct.i = alloc_addr @@ -116,7 +116,7 @@ assert memval == 8 # Str tests ## Basic tests -memstr = MemStr(jitter.vm, addr_str) +memstr = PinnedStr(jitter.vm, addr_str) memstr.value = "" assert memstr.value == "" assert jitter.vm.get_mem(memstr.get_addr(), 1) == '\x00' @@ -125,7 +125,7 @@ assert jitter.vm.get_mem(memstr.get_addr(), memstr.get_size()) == 'lala\x00' jitter.vm.set_mem(memstr.get_addr(), 'MIAMs\x00') assert memstr.value == 'MIAMs' -## Ptr(MemStr) manipulations +## Ptr(PinnedStr) manipulations mstruct.s = memstr.get_addr() assert mstruct.s == addr_str assert mstruct.deref_s == memstr @@ -135,25 +135,25 @@ assert mstruct.deref_s.value == "That's all folks!" assert memstr.value == "That's all folks!" ## Other address, same value, same encoding -memstr2 = MemStr(jitter.vm, addr_str2) +memstr2 = PinnedStr(jitter.vm, addr_str2) memstr2.value = "That's all folks!" assert memstr2.get_addr() != memstr.get_addr() assert memstr2 == memstr ## Same value, other encoding -memstr3 = MemStr(jitter.vm, addr_str3, "utf16") +memstr3 = PinnedStr(jitter.vm, addr_str3, "utf16") memstr3.value = "That's all folks!" assert memstr3.get_addr() != memstr.get_addr() assert memstr3.get_size() != memstr.get_size() # Size is different -assert str(memstr3) != str(memstr) # Mem representation is different +assert str(memstr3) != str(memstr) # Pinned representation is different assert memstr3 != memstr # Encoding is different, so they are not eq assert memstr3.value == memstr.value # But the python value is the same -# MemArray tests +# PinnedArray tests # Allocate buffer manually, since memarray is unsized alloc_addr = my_heap.vm_alloc(jitter.vm, 0x100) -memarray = MemArray(jitter.vm, alloc_addr, Num("I")) +memarray = PinnedArray(jitter.vm, alloc_addr, Num("I")) # This also works: _memarray = mem_array_type(Num("I"))(jitter.vm, alloc_addr) memarray[0] = 0x02 @@ -193,8 +193,8 @@ except ValueError: pass -# MemSizedArray tests -memsarray = MemSizedArray(jitter.vm, None, Num("I"), 10) +# PinnedSizedArray tests +memsarray = PinnedSizedArray(jitter.vm, None, Num("I"), 10) # This also works: _memsarray = mem_sized_array_type(Num("I"), 10)(jitter.vm) # And mem_sized_array_type generates statically sized types @@ -212,7 +212,7 @@ assert str(memsarray) == '\x02\x00\x00\x00' + '\xcc' * (4 * 9) # Atypical fields (RawStruct and Array) -class MyStruct2(MemStruct): +class MyStruct2(PinnedStruct): fields = [ ("s1", RawStruct("=BI")), ("s2", Array(Num("B"), 10)), @@ -244,8 +244,8 @@ ms2.s2 = [1] * 10 for val in ms2.s2: assert val == 1 -### Field assignment (MemSizedArray) -array2 = MemSizedArray(jitter.vm, None, Num("B"), 10) +### Field assignment (PinnedSizedArray) +array2 = PinnedSizedArray(jitter.vm, None, Num("B"), 10) jitter.vm.set_mem(array2.get_addr(), '\x02'*10) for val in array2: assert val == 2 @@ -255,13 +255,13 @@ for val in ms2.s2: # Inline tests -class InStruct(MemStruct): +class InStruct(PinnedStruct): fields = [ ("foo", Num("B")), ("bar", Num("B")), ] -class ContStruct(MemStruct): +class ContStruct(PinnedStruct): fields = [ ("one", Num("B")), ("instruct", Inline(InStruct)), @@ -294,7 +294,7 @@ assert jitter.vm.get_mem(cont.get_addr(), len(cont)) == '\x01\x02\x03\x04' # Union test -class UniStruct(MemStruct): +class UniStruct(PinnedStruct): fields = [ ("one", Num("B")), ("union", Union([ @@ -320,7 +320,7 @@ assert uni.instruct.bar == 0x22 # BitField test -class BitStruct(MemStruct): +class BitStruct(PinnedStruct): fields = [ ("flags", BitField(Num("H"), [ ("f1_1", 1), @@ -353,24 +353,24 @@ assert bit.f4_1 == 1 # Unhealthy ideas -class UnhealthyIdeas(MemStruct): +class UnhealthyIdeas(PinnedStruct): fields = [ - ("pastruct", Ptr("I", MemArray, RawStruct("=Bf"))), - ("apstr", Array(Ptr("I", MemStr), 10)), - ("pself", Ptr("I", MemSelf)), - ("apself", Array(Ptr("I", MemSelf), 2)), - ("ppself", Ptr("I", Ptr("I", MemSelf))), - ("pppself", Ptr("I", Ptr("I", Ptr("I", MemSelf)))), + ("pastruct", Ptr("I", PinnedArray, RawStruct("=Bf"))), + ("apstr", Array(Ptr("I", PinnedStr), 10)), + ("pself", Ptr("I", PinnedSelf)), + ("apself", Array(Ptr("I", PinnedSelf), 2)), + ("ppself", Ptr("I", Ptr("I", PinnedSelf))), + ("pppself", Ptr("I", Ptr("I", Ptr("I", PinnedSelf)))), ] # Other way to handle self dependency and circular dependencies -# NOTE: in this case, MemSelf would have been fine +# NOTE: in this case, PinnedSelf would have been fine UnhealthyIdeas.fields.append( ("pppself2", Ptr("I", Ptr("I", Ptr("I", UnhealthyIdeas))))) # Regen all fields UnhealthyIdeas.gen_fields() -p_size = Ptr("I", MemVoid).size() +p_size = Ptr("I", PinnedVoid).size() ideas = UnhealthyIdeas(jitter.vm) ideas.memset() @@ -401,34 +401,34 @@ assert ideas.deref_pppself.deref_value.deref_value == ideas # Cast tests -# MemStruct cast -MemInt = mem(Num("I")) -MemShort = mem(Num("H")) -dword = MemInt(jitter.vm) +# PinnedStruct cast +PinnedInt = pin(Num("I")) +PinnedShort = pin(Num("H")) +dword = PinnedInt(jitter.vm) dword.value = 0x12345678 -assert isinstance(dword.cast(MemShort), MemShort) -assert dword.cast(MemShort).value == 0x5678 +assert isinstance(dword.cast(PinnedShort), PinnedShort) +assert dword.cast(PinnedShort).value == 0x5678 # Field cast ms2.s2[0] = 0x34 ms2.s2[1] = 0x12 -assert ms2.cast_field("s2", MemShort).value == 0x1234 +assert ms2.cast_field("s2", PinnedShort).value == 0x1234 # Other method -assert MemShort(jitter.vm, ms2.get_addr("s2")).value == 0x1234 +assert PinnedShort(jitter.vm, ms2.get_addr("s2")).value == 0x1234 # Manual cast inside an Array ms2.s2[4] = 0xcd ms2.s2[5] = 0xab -assert MemShort(jitter.vm, ms2.s2.index2addr(4)).value == 0xabcd +assert PinnedShort(jitter.vm, ms2.s2.index2addr(4)).value == 0xabcd # void* style cast -MemPtrVoid = mem(Ptr("I", MemVoid)) -MemPtrMyStruct = mem(Ptr("I", MyStruct)) -p = MemPtrVoid(jitter.vm) +PinnedPtrVoid = pin(Ptr("I", PinnedVoid)) +PinnedPtrMyStruct = pin(Ptr("I", MyStruct)) +p = PinnedPtrVoid(jitter.vm) p.value = mstruct.get_addr() assert p.deref_value.cast(MyStruct) == mstruct -assert p.cast(MemPtrMyStruct).deref_value == mstruct +assert p.cast(PinnedPtrMyStruct).deref_value == mstruct # Field equality tests assert RawStruct("IH") == RawStruct("IH") @@ -463,13 +463,13 @@ assert BitField(Num("B"), [("f1", 1), ("f2", 4), ("f3", 1)]) != \ BitField(Num("B"), [("f1", 2), ("f2", 4), ("f3", 1)]) -# Quick mem(MemField)/MemField hash test: -assert mem(Num("f"))(jitter.vm, addr) == mem(Num("f"))(jitter.vm, addr) +# Quick pin(PinnedField)/PinnedField hash test: +assert pin(Num("f"))(jitter.vm, addr) == pin(Num("f"))(jitter.vm, addr) # Types are cached -assert mem(Num("f")) == mem(Num("f")) -assert mem(Num("d")) != mem(Num("f")) -assert mem(Union([("f1", Num("I")), ("f2", Num("H"))])) == \ - mem(Union([("f1", Num("I")), ("f2", Num("H"))])) +assert pin(Num("f")) == pin(Num("f")) +assert pin(Num("d")) != pin(Num("f")) +assert pin(Union([("f1", Num("I")), ("f2", Num("H"))])) == \ + pin(Union([("f1", Num("I")), ("f2", Num("H"))])) assert mem_array_type(Num("B")) == mem_array_type(Num("B")) assert mem_array_type(Num("I")) != mem_array_type(Num("B")) assert mem_sized_array_type(Num("B"), 20) == mem_sized_array_type(Num("B"), 20) @@ -485,8 +485,8 @@ print repr(cont), '\n' print repr(uni), '\n' print repr(bit), '\n' print repr(ideas), '\n' -print repr(mem(Array(Inline(MyStruct2), 2))(jitter.vm, addr)), '\n' -print repr(mem(Num("f"))(jitter.vm, addr)), '\n' +print repr(pin(Array(Inline(MyStruct2), 2))(jitter.vm, addr)), '\n' +print repr(pin(Num("f"))(jitter.vm, addr)), '\n' print repr(memarray) print repr(memsarray) print repr(memstr) -- cgit 1.4.1 From d19f4c1dbdd2f1f451d03551abb0e5ebf4d455be Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Sun, 29 Nov 2015 22:03:11 +0100 Subject: MemStruct: big refactor in process Doc is currently incoherent, impl will also be completed --- example/jitter/memstruct.py | 29 +-- miasm2/analysis/mem.py | 480 ++++++++++++++++++++++++-------------------- test/analysis/mem.py | 246 ++++++++++++----------- 3 files changed, 407 insertions(+), 348 deletions(-) (limited to 'test') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index 645c019e..6e8e13af 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -36,15 +36,15 @@ class ListNode(PinnedStruct): ] def get_next(self): - if self.next == 0: + if self.next.val == 0: return None - return self.deref_next + return self.next.deref def get_data(self, data_type=None): if data_type is not None: - return self.deref_data.cast(data_type) + return self.data.deref.cast(data_type) else: - return self.deref_data + return self.data.deref class LinkedList(PinnedStruct): @@ -60,12 +60,12 @@ class LinkedList(PinnedStruct): """Returns the head ListNode instance""" if self.head == 0: return None - return self.deref_head + return self.head.deref def get_tail(self): if self.tail == 0: return None - return self.deref_tail + return self.tail.deref def push(self, data): # Allocate a new node @@ -112,7 +112,7 @@ class LinkedList(PinnedStruct): if not self.empty(): cur = self.get_head() while cur is not None: - yield cur.deref_data + yield cur.data.deref cur = cur.get_next() @@ -123,9 +123,9 @@ class DataArray(PinnedStruct): ("val1", Num("B")), ("val2", Num("B")), # Ptr can also be instanciated with a PinnedField as an argument, a special - # PinnedStruct containing only one field named "value" will be created, so + # PinnedStruct containing only one field named "val" will be created, so # that Ptr can point to a PinnedStruct instance. Here, - # data_array.deref_array.value will allow to access an Array + # data_array.array.deref.val will allow to access an Array ("arrayptr", Ptr(" as well as when used, to set and + PinnedType. + + FIXME: DOC + + Provides deref_ as well as when used, to set and get the pointed PinnedType. """ @@ -360,7 +364,7 @@ class Ptr(Num): else: raise ValueError("Unsupported usecase for PinnedSelf, sorry") if isinstance(self._dst_type, Type): - self._dst_type = pin(self._dst_type) + self._dst_type = self._dst_type.pinned @property def dst_type(self): @@ -368,11 +372,28 @@ class Ptr(Num): self._fix_dst_type() return self._dst_type + def set(self, vm, addr, val): + if isinstance(val, PinnedType) and isinstance(val.get_type(), Ptr): + self.set_val(vm, addr, val.val) + else: + super(Ptr, self).set(vm, addr, val) + + def get(self, vm, addr): + return self.pinned(vm, addr) + + def get_val(self, vm, addr): + return super(Ptr, self).get(vm, addr) + + def set_val(self, vm, addr, val): + return super(Ptr, self).set(vm, addr, val) + def deref_get(self, vm, addr): """Deserializes the data in @vm (VmMngr) at @addr to self.dst_type. Equivalent to a pointer dereference rvalue in C. """ - return self.dst_type(vm, addr, *self._type_args, **self._type_kwargs) + dst_addr = self.get_val(vm, addr) + return self.dst_type(vm, dst_addr, + *self._type_args, **self._type_kwargs) def deref_set(self, vm, addr, val): """Serializes the @val PinnedType subclass instance in @vm (VmMngr) at @@ -384,7 +405,8 @@ class Ptr(Num): self._dst_type.__name__, val.__class__.__name__) # Actual job - vm.set_mem(addr, str(val)) + dst_addr = self.get_val(vm, addr) + vm.set_mem(dst_addr, str(val)) def _get_pinned_base_class(self): return PinnedPtr @@ -403,7 +425,7 @@ class Ptr(Num): self._type_args)) -class Inline(Type): +class Struct(Type): """Field used to inline a PinnedType in another PinnedType. Equivalent to having a struct field in a C struct. @@ -424,34 +446,145 @@ class Inline(Type): TODO: make the Inline implicit when setting a field to be a PinnedStruct """ - def __init__(self, inlined_type, *type_args, **type_kwargs): - if not issubclass(inlined_type, PinnedStruct): - raise ValueError("inlined type if Inline must be a PinnedStruct") - self._il_type = inlined_type - self._type_args = type_args - self._type_kwargs = type_kwargs + def __init__(self, name, fields): + self.name = name + # fields is immutable + self._fields = tuple(fields) + self._gen_fields() + + def _gen_fields(self): + """Precompute useful metadata on self.fields.""" + self._fields_desc = {} + offset = 0 + for name, field in self._fields: + # For reflexion + field._set_self_type(self) + self._gen_field(name, field, offset) + offset += field.size() + self._size = offset + + def _gen_field(self, name, field, offset): + """Generate only one field + + @name: (str) the name of the field + @field: (Type instance) the field type + @offset: (int) the offset of the field in the structure + """ + self._fields_desc[name] = {"field": field, "offset": offset} + + @property + def fields(self): + return self._fields def set(self, vm, addr, val): raw = str(val) vm.set_mem(addr, raw) def get(self, vm, addr): - return self._il_type(vm, addr) + return self.pinned(vm, addr) + + def get_field(self, vm, addr, name): + """get a field value by name. + + useless most of the time since fields are accessible via self.. + """ + if name not in self._fields_desc: + raise ValueError("'%s' type has no field '%s'" + % (self, name)) + field = self.get_field_type(name) + offset = self.get_offset(name) + return field.get(vm, addr + offset) + + def set_field(self, vm, addr, name, val): + """set a field value by name. @val is the python value corresponding to + this field type. + + useless most of the time since fields are accessible via self.. + """ + if name not in self._fields_desc: + raise AttributeError("'%s' object has no attribute '%s'" + % (self.__class__.__name__, name)) + field = self.get_field_type(name) + offset = self.get_offset(name) + field.set(vm, addr + offset, val) def size(self): - return self._il_type.sizeof() + # Child classes can set self._size if their size is not the sum of + # their fields + return sum(a["field"].size() for a in self._fields_desc.itervalues()) + + def get_offset(self, field_name): + """ + @field_name: (str, optional) the name of the field to get the + offset of + """ + if field_name not in self._fields_desc: + raise ValueError("This structure has no %s field" % field_name) + return self._fields_desc[field_name]['offset'] + + def get_field_type(self, name): + """return the type subclass instance describing field @name.""" + # TODO: move it to Struct + return self._fields_desc[name]['field'] + + #def _build_pinned_type(self): + # mem_type = type("PinnedStruct%s" % self.name, + # (PinnedStruct,), + # {'_type': self}) + # return mem_type + + def _get_pinned_base_class(self): + return PinnedStruct def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self._il_type) + return "Struct%s" % self.name def __eq__(self, other): return self.__class__ == other.__class__ and \ - self._il_type == other._il_type and \ - self._type_args == other._type_args and \ - self._type_kwargs == other._type_kwargs + self.fields == other.fields and \ + self.name == other.name def __hash__(self): - return hash((self.__class__, self._il_type, self._type_args)) + # Only hash name, not fields, because if a field is a Ptr to this + # Struct type, an infinite loop occurs + return hash((self.__class__, self.name)) + + +class Union(Struct): + """Allows to put multiple fields at the same offset in a PinnedStruct, similar + to unions in C. The Union will have the size of the largest of its fields. + + Example: + + class Example(PinnedStruct): + fields = [("uni", Union([ + ("f1", Num(". """ - if name not in self._attrs: - raise attributeerror("'%s' object has no attribute '%s'" - % (self.__class__.__name__, name)) - field = self._attrs[name]["field"] - offset = self._attrs[name]["offset"] - return field.get(self._vm, self.get_addr() + offset) + return self._type.get_field(self._vm, self.get_addr(), name) def set_field(self, name, val): """set a field value by name. @val is the python value corresponding to @@ -941,37 +1001,7 @@ class PinnedStruct(PinnedType): useless most of the time since fields are accessible via self.. """ - if name not in self._attrs: - raise attributeerror("'%s' object has no attribute '%s'" - % (self.__class__.__name__, name)) - field = self._attrs[name]["field"] - offset = self._attrs[name]["offset"] - field.set(self._vm, self.get_addr() + offset, val) - - def deref_field(self, name): - """get the memstruct pointed by field. - - useless most of the time since fields are accessible via - self.deref_. - """ - addr = self.get_field(name) - field = self._attrs[name]["field"] - assert isinstance(field, Ptr),\ - "programming error: field should be a Ptr" - return field.deref_get(self._vm, addr) - - def set_deref_field(self, name, val): - """set the memstruct pointed by field. @val should be of the - type of the pointed memstruct. the field must be a Ptr. - - useless most of the time since fields are accessible via - self.deref_. - """ - addr = self.get_field(name) - field = self._attrs[name]["field"] - assert isinstance(field, Ptr),\ - "programming error: field should be a Ptr" - field.deref_set(self._vm, addr, val) + return self._type.set_field(self._vm, self.get_addr(), name, val) def cast_field(self, field, other_type, *type_args, **type_kwargs): """ @@ -981,7 +1011,7 @@ class PinnedStruct(PinnedType): *type_args, **type_kwargs) - # Field generation methods, voluntarily public to be able to regen fields + # Field generation methods, voluntarily public to be able to gen fields # after class definition @classmethod @@ -1006,56 +1036,59 @@ class PinnedStruct(PinnedType): class B(PinnedStruct): fields = [("a", Ptr("I", A))] - A.fields = [("b", Ptr("I", B))] - a.gen_field() + A.gen_fields([("b", Ptr("I", B))]) """ - if fields is None: - fields = cls.fields - cls._attrs = {} - offset = 0 - for name, field in cls.fields: - # For reflexion - field._set_self_type(cls) - cls.gen_field(name, field, offset) - offset += field.size() - cls._size = offset + if fields is not None: + if cls.fields is not None: + raise ValueError("Cannot regen fields of a class. Setting " + "cls.fields at class definition and calling " + "gen_fields are mutually exclusive.") + cls.fields = fields + + if cls._type is None: + if cls.fields is None: + raise ValueError("Cannot create a PinnedStruct subclass without" + " a cls._type or a cls.fields") + cls._type = cls._gen_type(cls.fields) + + if cls._type in DYN_MEM_STRUCT_CACHE: + # FIXME: Maybe a warning would be better? + raise RuntimeError("Another PinnedType has the same type as this " + "one. Use it instead.") + + # Register this class so that another one will not be created when + # calling cls._type.pinned + DYN_MEM_STRUCT_CACHE[cls._type] = cls + + cls._gen_attributes() @classmethod - def gen_field(cls, name, field, offset): - """Generate only one field - - @name: (str) the name of the field - @field: (Type instance) the field type - @offset: (int) the offset of the field in the structure - """ - cls._gen_simple_attr(name, field, offset) - if isinstance(field, Union): - cls._gen_union_attr(field, offset) + def _gen_attributes(cls): + # Generate self. getter and setters + for name, field in cls._type.fields: + setattr(cls, name, property( + lambda self, name=name: self.get_field(name), + lambda self, val, name=name: self.set_field(name, val) + )) @classmethod - def _gen_simple_attr(cls, name, field, offset): - cls._attrs[name] = {"field": field, "offset": offset} - - # Generate self. getter and setter - setattr(cls, name, property( - lambda self: self.get_field(name), - lambda self, val: self.set_field(name, val) - )) - - # Generate self.deref_ getter and setter if this field is a - # Ptr - if isinstance(field, Ptr): - setattr(cls, "deref_%s" % name, property( - lambda self: self.deref_field(name), - lambda self, val: self.set_deref_field(name, val) - )) + def _gen_type(cls, fields): + return Struct(cls.__name__, fields) + + def __repr__(self): + out = [] + for name, field in self._type.fields: + val_repr = repr(self.get_field(name)) + if '\n' in val_repr: + val_repr = '\n' + indent(val_repr, 4) + out.append("%s: %r = %s" % (name, field, val_repr)) + return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) + +class PinnedUnion(PinnedStruct): @classmethod - def _gen_union_attr(cls, union_field, offset): - if not isinstance(union_field, Union): - raise ValueError("field should be an Union instance") - for name, field in union_field.field_list: - cls.gen_field(name, field, offset) + def _gen_type(cls, fields): + return Union(fields) class PinnedSelf(PinnedStruct): @@ -1069,19 +1102,30 @@ class PinnedSelf(PinnedStruct): ("data", Ptr("). Deref can be read and set. + # TODO: comment ("other", Ptr("I", OtherStruct)), # Ptr to a variable length String ("s", Ptr("I", PinnedStr)), @@ -61,15 +60,15 @@ assert memval == 3 mstruct.memset() assert mstruct.num == 0 assert mstruct.flags == 0 -assert mstruct.other == 0 -assert mstruct.s == 0 -assert mstruct.i == 0 +assert mstruct.other.val == 0 +assert mstruct.s.val == 0 +assert mstruct.i.val == 0 mstruct.memset('\x11') assert mstruct.num == 0x11111111 assert mstruct.flags == 0x11 -assert mstruct.other == 0x11111111 -assert mstruct.s == 0x11111111 -assert mstruct.i == 0x11111111 +assert mstruct.other.val == 0x11111111 +assert mstruct.s.val == 0x11111111 +assert mstruct.i.val == 0x11111111 # From now, just use heap.vm_alloc @@ -85,19 +84,21 @@ other.foo = 0x1234 assert other.foo == 0x1234 ## Basic usage -mstruct.other = other.get_addr() -assert mstruct.other == other.get_addr() -assert mstruct.deref_other == other -assert mstruct.deref_other.foo == 0x1234 +mstruct.other.val = other.get_addr() +# This also works for now: +# mstruct.other = other.get_addr() +assert mstruct.other.val == other.get_addr() +assert mstruct.other.deref == other +assert mstruct.other.deref.foo == 0x1234 ## Deref assignment other2 = OtherStruct(jitter.vm) other2.foo = 0xbeef -assert mstruct.deref_other != other2 -mstruct.deref_other = other2 -assert mstruct.deref_other == other2 -assert mstruct.deref_other.foo == 0xbeef -assert mstruct.other == other.get_addr() # Addr did not change +assert mstruct.other.deref != other2 +mstruct.other.deref = other2 +assert mstruct.other.deref == other2 +assert mstruct.other.deref.foo == 0xbeef +assert mstruct.other.val == other.get_addr() # Addr did not change assert other.foo == 0xbeef # Deref assignment copies by value assert other2.foo == 0xbeef assert other.get_addr() != other2.get_addr() # Not the same address @@ -105,11 +106,12 @@ assert other == other2 # But same value ## Same stuff for Ptr to PinnedField alloc_addr = my_heap.vm_alloc(jitter.vm, - mstruct.get_field_type("i").dst_type.sizeof()) + mstruct.get_type().get_field_type("i") + .dst_type.sizeof()) mstruct.i = alloc_addr -mstruct.deref_i.value = 8 -assert mstruct.deref_i.value == 8 -assert mstruct.i == alloc_addr +mstruct.i.deref.val = 8 +assert mstruct.i.deref.val == 8 +assert mstruct.i.val == alloc_addr memval = struct.unpack("I", jitter.vm.get_mem(alloc_addr, 4))[0] assert memval == 8 @@ -117,37 +119,37 @@ assert memval == 8 # Str tests ## Basic tests memstr = PinnedStr(jitter.vm, addr_str) -memstr.value = "" -assert memstr.value == "" +memstr.val = "" +assert memstr.val == "" assert jitter.vm.get_mem(memstr.get_addr(), 1) == '\x00' -memstr.value = "lala" +memstr.val = "lala" assert jitter.vm.get_mem(memstr.get_addr(), memstr.get_size()) == 'lala\x00' jitter.vm.set_mem(memstr.get_addr(), 'MIAMs\x00') -assert memstr.value == 'MIAMs' +assert memstr.val == 'MIAMs' ## Ptr(PinnedStr) manipulations -mstruct.s = memstr.get_addr() -assert mstruct.s == addr_str -assert mstruct.deref_s == memstr -assert mstruct.deref_s.value == 'MIAMs' -mstruct.deref_s.value = "That's all folks!" -assert mstruct.deref_s.value == "That's all folks!" -assert memstr.value == "That's all folks!" +mstruct.s.val = memstr.get_addr() +assert mstruct.s.val == addr_str +assert mstruct.s.deref == memstr +assert mstruct.s.deref.val == 'MIAMs' +mstruct.s.deref.val = "That's all folks!" +assert mstruct.s.deref.val == "That's all folks!" +assert memstr.val == "That's all folks!" ## Other address, same value, same encoding memstr2 = PinnedStr(jitter.vm, addr_str2) -memstr2.value = "That's all folks!" +memstr2.val = "That's all folks!" assert memstr2.get_addr() != memstr.get_addr() assert memstr2 == memstr ## Same value, other encoding memstr3 = PinnedStr(jitter.vm, addr_str3, "utf16") -memstr3.value = "That's all folks!" +memstr3.val = "That's all folks!" assert memstr3.get_addr() != memstr.get_addr() assert memstr3.get_size() != memstr.get_size() # Size is different assert str(memstr3) != str(memstr) # Pinned representation is different assert memstr3 != memstr # Encoding is different, so they are not eq -assert memstr3.value == memstr.value # But the python value is the same +assert memstr3.val == memstr.val # But the python value is the same # PinnedArray tests @@ -254,7 +256,7 @@ for val in ms2.s2: assert val == 2 -# Inline tests +# Inlining a PinnedType tests class InStruct(PinnedStruct): fields = [ ("foo", Num("B")), @@ -264,7 +266,7 @@ class InStruct(PinnedStruct): class ContStruct(PinnedStruct): fields = [ ("one", Num("B")), - ("instruct", Inline(InStruct)), + ("instruct", InStruct.get_type()), ("last", Num("B")), ] @@ -298,7 +300,7 @@ class UniStruct(PinnedStruct): fields = [ ("one", Num("B")), ("union", Union([ - ("instruct", Inline(InStruct)), + ("instruct", InStruct.get_type()), ("i", Num(">I")), ])), ("last", Num("B")), @@ -308,20 +310,21 @@ uni = UniStruct(jitter.vm) jitter.vm.set_mem(uni.get_addr(), ''.join(chr(x) for x in xrange(len(uni)))) assert len(uni) == 6 # 1 + max(InStruct.sizeof(), 4) + 1 assert uni.one == 0x00 -assert uni.instruct.foo == 0x01 -assert uni.instruct.bar == 0x02 -assert uni.i == 0x01020304 +assert uni.union.instruct.foo == 0x01 +assert uni.union.instruct.bar == 0x02 +assert uni.union.i == 0x01020304 assert uni.last == 0x05 -uni.instruct.foo = 0x02 -assert uni.i == 0x02020304 -uni.i = 0x11223344 -assert uni.instruct.foo == 0x11 -assert uni.instruct.bar == 0x22 +uni.union.instruct.foo = 0x02 +assert uni.union.i == 0x02020304 +uni.union.i = 0x11223344 +assert uni.union.instruct.foo == 0x11 +assert uni.union.instruct.bar == 0x22 # BitField test -class BitStruct(PinnedStruct): +class BitStruct(PinnedUnion): fields = [ + ("flags_num", Num("H")), ("flags", BitField(Num("H"), [ ("f1_1", 1), ("f2_5", 5), @@ -332,24 +335,24 @@ class BitStruct(PinnedStruct): bit = BitStruct(jitter.vm) bit.memset() -assert bit.flags == 0 -assert bit.f1_1 == 0 -assert bit.f2_5 == 0 -assert bit.f3_8 == 0 -assert bit.f4_1 == 0 -bit.f1_1 = 1 -bit.f2_5 = 0b10101 -bit.f3_8 = 0b10000001 -assert bit.flags == 0b0010000001101011 -assert bit.f1_1 == 1 -assert bit.f2_5 == 0b10101 -assert bit.f3_8 == 0b10000001 -assert bit.f4_1 == 0 -bit.flags = 0b1101010101011100 -assert bit.f1_1 == 0 -assert bit.f2_5 == 0b01110 -assert bit.f3_8 == 0b01010101 -assert bit.f4_1 == 1 +assert bit.flags_num == 0 +assert bit.flags.f1_1 == 0 +assert bit.flags.f2_5 == 0 +assert bit.flags.f3_8 == 0 +assert bit.flags.f4_1 == 0 +bit.flags.f1_1 = 1 +bit.flags.f2_5 = 0b10101 +bit.flags.f3_8 = 0b10000001 +assert bit.flags_num == 0b0010000001101011 +assert bit.flags.f1_1 == 1 +assert bit.flags.f2_5 == 0b10101 +assert bit.flags.f3_8 == 0b10000001 +assert bit.flags.f4_1 == 0 +bit.flags_num = 0b1101010101011100 +assert bit.flags.f1_1 == 0 +assert bit.flags.f2_5 == 0b01110 +assert bit.flags.f3_8 == 0b01010101 +assert bit.flags.f4_1 == 1 # Unhealthy ideas @@ -363,72 +366,83 @@ class UnhealthyIdeas(PinnedStruct): ("pppself", Ptr("I", Ptr("I", Ptr("I", PinnedSelf)))), ] -# Other way to handle self dependency and circular dependencies -# NOTE: in this case, PinnedSelf would have been fine -UnhealthyIdeas.fields.append( - ("pppself2", Ptr("I", Ptr("I", Ptr("I", UnhealthyIdeas))))) -# Regen all fields -UnhealthyIdeas.gen_fields() - p_size = Ptr("I", PinnedVoid).size() ideas = UnhealthyIdeas(jitter.vm) ideas.memset() ideas.pself = ideas.get_addr() -assert ideas == ideas.deref_pself +assert ideas == ideas.pself.deref ideas.apself[0] = ideas.get_addr() -assert ideas.apself.deref_get(0) == ideas +assert ideas.apself[0].deref == ideas ideas.apself[1] = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) -ideas.apself.deref_set(1, ideas) +ideas.apself[1].deref = ideas assert ideas.apself[1] != ideas.get_addr() -assert ideas.apself.deref_get(1) == ideas +assert ideas.apself[1].deref == ideas ideas.ppself = my_heap.vm_alloc(jitter.vm, p_size) -ideas.deref_ppself.value = ideas.get_addr() -assert ideas.deref_ppself.value == ideas.get_addr() -assert ideas.deref_ppself.deref_value == ideas +ideas.ppself.deref.val = ideas.get_addr() +assert ideas.ppself.deref.val == ideas.get_addr() +assert ideas.ppself.deref.deref == ideas -ideas.deref_ppself.value = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) -ideas.deref_ppself.deref_value = ideas -assert ideas.deref_ppself.value != ideas.get_addr() -assert ideas.deref_ppself.deref_value == ideas +ideas.ppself.deref.val = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) +ideas.ppself.deref.deref = ideas +assert ideas.ppself.deref.val != ideas.get_addr() +assert ideas.ppself.deref.deref == ideas ideas.pppself = my_heap.vm_alloc(jitter.vm, p_size) -ideas.deref_pppself.value = my_heap.vm_alloc(jitter.vm, p_size) -ideas.deref_pppself.deref_value.value = ideas.get_addr() -assert ideas.deref_pppself.deref_value.deref_value == ideas +ideas.pppself.deref.val = my_heap.vm_alloc(jitter.vm, p_size) +ideas.pppself.deref.deref.val = ideas.get_addr() +assert ideas.pppself.deref.deref.deref == ideas + + +# Circular dependencies +class A(PinnedStruct): + pass + +class B(PinnedStruct): + fields = [("a", Ptr("I", A)),] + +# Gen A's fields after declaration +A.gen_fields([("b", Ptr("I", B)),]) + +a = A(jitter.vm) +b = B(jitter.vm) +a.b.val = b.get_addr() +b.a.val = a.get_addr() +assert a.b.deref == b +assert b.a.deref == a # Cast tests # PinnedStruct cast -PinnedInt = pin(Num("I")) -PinnedShort = pin(Num("H")) +PinnedInt = Num("I").pinned +PinnedShort = Num("H").pinned dword = PinnedInt(jitter.vm) -dword.value = 0x12345678 +dword.val = 0x12345678 assert isinstance(dword.cast(PinnedShort), PinnedShort) -assert dword.cast(PinnedShort).value == 0x5678 +assert dword.cast(PinnedShort).val == 0x5678 # Field cast ms2.s2[0] = 0x34 ms2.s2[1] = 0x12 -assert ms2.cast_field("s2", PinnedShort).value == 0x1234 +assert ms2.cast_field("s2", PinnedShort).val == 0x1234 # Other method -assert PinnedShort(jitter.vm, ms2.get_addr("s2")).value == 0x1234 +assert PinnedShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 # Manual cast inside an Array ms2.s2[4] = 0xcd ms2.s2[5] = 0xab -assert PinnedShort(jitter.vm, ms2.s2.index2addr(4)).value == 0xabcd +assert PinnedShort(jitter.vm, ms2.s2.index2addr(4)).val == 0xabcd # void* style cast -PinnedPtrVoid = pin(Ptr("I", PinnedVoid)) -PinnedPtrMyStruct = pin(Ptr("I", MyStruct)) +PinnedPtrVoid = Ptr("I", PinnedVoid).pinned +PinnedPtrMyStruct = Ptr("I", MyStruct).pinned p = PinnedPtrVoid(jitter.vm) -p.value = mstruct.get_addr() -assert p.deref_value.cast(MyStruct) == mstruct -assert p.cast(PinnedPtrMyStruct).deref_value == mstruct +p.val = mstruct.get_addr() +assert p.deref.cast(MyStruct) == mstruct +assert p.cast(PinnedPtrMyStruct).deref == mstruct # Field equality tests assert RawStruct("IH") == RawStruct("IH") @@ -438,11 +452,19 @@ assert Num(">I") != Num("I", MyStruct) != Ptr(" Date: Sun, 29 Nov 2015 22:07:54 +0100 Subject: MemStruct: No more 'pin' and 'mem_sized_arraytype' functions --- miasm2/analysis/mem.py | 58 +++++++------------------------------------------- test/analysis/mem.py | 10 ++++----- 2 files changed, 13 insertions(+), 55 deletions(-) (limited to 'test') diff --git a/miasm2/analysis/mem.py b/miasm2/analysis/mem.py index 2b2b3a72..ce742dcc 100644 --- a/miasm2/analysis/mem.py +++ b/miasm2/analysis/mem.py @@ -71,8 +71,6 @@ of PinnedType with a custom implementation: in C. It cannot be allocated automatically, since it has no known size - PinnedSizedArray: a sized PinnedArray, can be automatically allocated in memory and allows more operations than PinnedArray - - pin: a function that dynamically generates a PinnedStruct subclass from a - Type. This class has only one field named "val". A PinnedType do not always have a static size (cls.sizeof()) nor a dynamic size (self.get_size()). @@ -167,28 +165,10 @@ def set_str_utf16(vm, addr, s): vm.set_mem(addr, s) -# Type to PinnedType helper - -def pin(field): - """Generate a PinnedStruct subclass from a field. The field's value can - be accessed through self.val or self.deref_val if field is a Ptr. - - @field: a Type instance. - """ - if field in DYN_MEM_STRUCT_CACHE: - return DYN_MEM_STRUCT_CACHE[field] - - fields = [("val", field)] - # Build a type to contain the field type - mem_type = type("Pinned%r" % field, (PinnedStruct,), {'fields': fields}) - DYN_MEM_STRUCT_CACHE[field] = mem_type - return mem_type - - # Type classes class Type(object): - """Base class to provide methods to set and get fields from virtual pin. + """Base class to provide methods to set and get fields from virtual mem. Subclasses can either override _pack and _unpack, or get and set if data serialization requires more work (see Inline implementation for an example). @@ -330,7 +310,7 @@ class Ptr(Num): in memory @dst_type: (PinnedType or Type) the PinnedType this Ptr points to. If a Type is given, it is transformed into a PinnedType with - pin(TheType). + TheType.pinned. *type_args, **type_kwargs: arguments to pass to the the pointed PinnedType when instanciating it (e.g. for PinnedStr encoding or PinnedArray field_type). @@ -348,10 +328,10 @@ class Ptr(Num): dst_type._get_self_type = lambda: self._get_self_type() # dst_type cannot be patched here, since _get_self_type of the outer # class has not yet been set. Patching dst_type involves calling - # pin(dst_type), which will only return a type that does not point + # dst_type.pinned, which will only return a type that does not point # on PinnedSelf but on the right class only when _get_self_type of the # outer class has been replaced by _MetaPinnedStruct. - # In short, dst_type = pin(dst_type) is not valid here, it is done + # In short, dst_type = dst_type.pinned is not valid here, it is done # lazily in _fix_dst_type self._dst_type = dst_type self._type_args = type_args @@ -1307,8 +1287,8 @@ class PinnedSizedArray(PinnedArray): """A fixed size PinnedArray. Its additional arg represents the @array_len (in number of elements) of this array. - This type is dynamically sized. Use mem_sized_array_type to generate a - fixed @field_type and @array_len array which has a static size. + This type is dynamically sized. Generate a fixed @field_type and @array_len + array which has a static size by using Array(type, size).pinned. """ _array_len = None @@ -1321,7 +1301,7 @@ class PinnedSizedArray(PinnedArray): if self._array_len is None or self._field_type is None: raise NotImplementedError( "Provide field_type and array_len to instanciate this class, " - "or generate a subclass with mem_sized_array_type.") + "or generate a subclass with Array(type, size).pinned.") @property def array_len(self): @@ -1330,7 +1310,7 @@ class PinnedSizedArray(PinnedArray): def sizeof(cls): raise ValueError("PinnedSizedArray is not statically sized. Use " - "mem_sized_array_type to generate a type that is.") + "Array(type, size).pinned to generate a type that is.") def get_size(self): return self._array_len * self._field_type.size() @@ -1384,25 +1364,3 @@ def mem_array_type(field_type): DYN_MEM_STRUCT_CACHE[cache_key] = array_type return array_type - -def mem_sized_array_type(field_type, array_len): - """Generate a PinnedSizedArray subclass that has a fixed @field_type and a - fixed @array_len. This allows to instanciate the returned type with only - the vm and addr arguments, as are standard PinnedTypes. - """ - cache_key = (field_type, array_len) - if cache_key in DYN_MEM_STRUCT_CACHE: - return DYN_MEM_STRUCT_CACHE[cache_key] - - @classmethod - def sizeof(cls): - return cls._field_type.size() * cls._array_len - - array_type = type('PinnedSizedArray_%r_%s' % (field_type, array_len), - (PinnedSizedArray,), - {'_array_len': array_len, - '_field_type': field_type, - 'sizeof': sizeof}) - DYN_MEM_STRUCT_CACHE[cache_key] = array_type - return array_type - diff --git a/test/analysis/mem.py b/test/analysis/mem.py index dca3346b..60d9c569 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -7,9 +7,9 @@ import struct from miasm2.analysis.machine import Machine from miasm2.analysis.mem import PinnedStruct, Num, Ptr, PinnedStr, PinnedArray,\ PinnedSizedArray, Array, mem_array_type,\ - mem_sized_array_type, RawStruct, pin,\ - Union, BitField, PinnedSelf, PinnedVoid, Bits, \ - set_allocator, PinnedUnion, Struct + RawStruct, Union, BitField, PinnedSelf, \ + PinnedVoid, Bits, set_allocator, PinnedUnion, \ + Struct from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE from miasm2.os_dep.common import heap @@ -198,8 +198,8 @@ except ValueError: # PinnedSizedArray tests memsarray = PinnedSizedArray(jitter.vm, None, Num("I"), 10) # This also works: -_memsarray = mem_sized_array_type(Num("I"), 10)(jitter.vm) -# And mem_sized_array_type generates statically sized types +_memsarray = Array(Num("I"), 10).pinned(jitter.vm) +# And Array(type, size).pinned generates statically sized types assert _memsarray.sizeof() == len(memsarray) memsarray.memset('\xcc') assert memsarray[0] == 0xcccccccc -- cgit 1.4.1 From 2b77be65a2810900898582f8a78d7d8a51acfe35 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 30 Nov 2015 10:06:35 +0100 Subject: MemStruct: Array/PinnedArray homogeneity Array access logic has moved to Array, Pinned(Sized)Array just contains the logic to interface with memory --- example/jitter/memstruct.py | 2 +- miasm2/analysis/mem.py | 208 ++++++++++++++++---------------------------- test/analysis/mem.py | 32 +++---- 3 files changed, 84 insertions(+), 158 deletions(-) (limited to 'test') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index 6e8e13af..038622ba 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -126,7 +126,7 @@ class DataArray(PinnedStruct): # PinnedStruct containing only one field named "val" will be created, so # that Ptr can point to a PinnedStruct instance. Here, # data_array.array.deref.val will allow to access an Array - ("arrayptr", Ptr("= self.size()): + raise IndexError("Index %s out of bounds" % idx) + + def _get_pinned_base_class(self): + if self.is_sized(): + return PinnedSizedArray + else: + return PinnedArray def __repr__(self): return "%r[%s]" % (self.field_type, self.array_len) @@ -891,7 +930,9 @@ class PinnedType(object): return "Pinned%r" % self._type def __eq__(self, other): - return self.__class__ == other.__class__ and str(self) == str(other) + return self.__class__ == other.__class__ and \ + self.get_type() == other.get_type() and \ + str(self) == str(other) def __ne__(self, other): return not self == other @@ -1200,87 +1241,34 @@ class PinnedArray(PinnedType): Such a generated type can be instanciated with only vm and addr, as are other PinnedTypes. """ - _field_type = None - - def __init__(self, vm, addr=None, field_type=None): - if self._field_type is None: - self._field_type = field_type - if self._field_type is None: - raise NotImplementedError( - "Provide field_type to instanciate this class, " - "or generate a subclass with mem_array_type.") - # FIXME: use underlying Array type - super(PinnedArray, self).__init__(vm, addr, - Array(self._field_type, None)) @property def field_type(self): """Return the Type subclass instance that represents the type of this PinnedArray items. """ - return self._field_type - - def _normalize_idx(self, idx): - # Noop for this type - return idx - - def _normalize_slice(self, slice_): - start = slice_.start if slice_.start is not None else 0 - stop = slice_.stop if slice_.stop is not None else self.get_size() - step = slice_.step if slice_.step is not None else 1 - return slice(start, stop, step) + return self.get_type().field_type - def _check_bounds(self, idx): - idx = self._normalize_idx(idx) - if not isinstance(idx, (int, long)): - raise ValueError("index must be an int or a long") - if idx < 0: - raise IndexError("Index %s out of bounds" % idx) - - def index2addr(self, idx): - """Return the address corresponding to a given @index in this PinnedArray. - """ - self._check_bounds(idx) - addr = self.get_addr() + idx * self._field_type.size() - return addr + def get_addr(self, idx=0): + return self._addr + self.get_type().get_offset(idx) def __getitem__(self, idx): - if isinstance(idx, slice): - res = [] - idx = self._normalize_slice(idx) - for i in xrange(idx.start, idx.stop, idx.step): - res.append(self._field_type.get(self._vm, self.index2addr(i))) - return res - else: - return self._field_type.get(self._vm, self.index2addr(idx)) + return self.get_type().get_item(self._vm, self._addr, idx) def __setitem__(self, idx, item): - if isinstance(idx, slice): - idx = self._normalize_slice(idx) - if len(item) != len(xrange(idx.start, idx.stop, idx.step)): - raise ValueError("Mismatched lengths in slice assignment") - # TODO: izip - for i, val in zip(xrange(idx.start, idx.stop, idx.step), item): - self._field_type.set(self._vm, self.index2addr(i), val) - else: - self._field_type.set(self._vm, self.index2addr(idx), item) + self.get_type().set_item(self._vm, self._addr, idx, item) # just a shorthand def as_mem_str(self, encoding="ansi"): return self.cast(PinnedStr, encoding) - @classmethod - def sizeof(cls): - raise ValueError("%s is unsized, it has no static size (sizeof). " - "Use PinnedSizedArray instead." % cls) - def raw(self): raise ValueError("%s is unsized, which prevents from getting its full " "raw representation. Use PinnedSizedArray instead." % self.__class__) def __repr__(self): - return "[%r, ...] [%r]" % (self[0], self._field_type) + return "[%r, ...] [%r]" % (self[0], self.field_type) class PinnedSizedArray(PinnedArray): @@ -1290,44 +1278,17 @@ class PinnedSizedArray(PinnedArray): This type is dynamically sized. Generate a fixed @field_type and @array_len array which has a static size by using Array(type, size).pinned. """ - _array_len = None - - def __init__(self, vm, addr=None, field_type=None, array_len=None): - # Set the length before anything else to allow get_size() to work for - # allocation - if self._array_len is None: - self._array_len = array_len - super(PinnedSizedArray, self).__init__(vm, addr, field_type) - if self._array_len is None or self._field_type is None: - raise NotImplementedError( - "Provide field_type and array_len to instanciate this class, " - "or generate a subclass with Array(type, size).pinned.") @property def array_len(self): """The length, in number of elements, of this array.""" - return self._array_len - - def sizeof(cls): - raise ValueError("PinnedSizedArray is not statically sized. Use " - "Array(type, size).pinned to generate a type that is.") + return self.get_type().array_len def get_size(self): - return self._array_len * self._field_type.size() - - def _normalize_idx(self, idx): - if idx < 0: - return self.get_size() - idx - return idx - - def _check_bounds(self, idx): - if not isinstance(idx, int) and not isinstance(idx, long): - raise ValueError("index must be an int or a long") - if idx < 0 or idx >= self.get_size(): - raise IndexError("Index %s out of bounds" % idx) + return self.get_type().size() def __iter__(self): - for i in xrange(self._array_len): + for i in xrange(self.get_type().array_len): yield self[i] def raw(self): @@ -1335,32 +1296,9 @@ class PinnedSizedArray(PinnedArray): def __repr__(self): item_reprs = [repr(item) for item in self] - if self._array_len > 0 and '\n' in item_reprs[0]: + if self.array_len > 0 and '\n' in item_reprs[0]: items = '\n' + indent(',\n'.join(item_reprs), 2) + '\n' else: items = ', '.join(item_reprs) - return "[%s] [%r; %s]" % (items, self._field_type, self._array_len) - - def __eq__(self, other): - # Special implementation to handle dynamic subclasses - return isinstance(other, PinnedSizedArray) and \ - self._field_type == other._field_type and \ - self._array_len == other._array_len and \ - str(self) == str(other) - - -def mem_array_type(field_type): - """Generate a PinnedArray subclass that has a fixed @field_type. It allows to - instanciate this class with only vm and addr argument, as are standard - PinnedTypes. - """ - cache_key = (field_type, None) - if cache_key in DYN_MEM_STRUCT_CACHE: - return DYN_MEM_STRUCT_CACHE[cache_key] - - array_type = type('PinnedArray_%r' % (field_type,), - (PinnedArray,), - {'_field_type': field_type}) - DYN_MEM_STRUCT_CACHE[cache_key] = array_type - return array_type + return "[%s] [%r; %s]" % (items, self.field_type, self.array_len) diff --git a/test/analysis/mem.py b/test/analysis/mem.py index 60d9c569..e1a2861f 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -6,10 +6,9 @@ import struct from miasm2.analysis.machine import Machine from miasm2.analysis.mem import PinnedStruct, Num, Ptr, PinnedStr, PinnedArray,\ - PinnedSizedArray, Array, mem_array_type,\ - RawStruct, Union, BitField, PinnedSelf, \ - PinnedVoid, Bits, set_allocator, PinnedUnion, \ - Struct + PinnedSizedArray, Array, RawStruct, Union, \ + BitField, PinnedSelf, PinnedVoid, Bits, \ + set_allocator, PinnedUnion, Struct from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE from miasm2.os_dep.common import heap @@ -155,9 +154,7 @@ assert memstr3.val == memstr.val # But the python value is the same # PinnedArray tests # Allocate buffer manually, since memarray is unsized alloc_addr = my_heap.vm_alloc(jitter.vm, 0x100) -memarray = PinnedArray(jitter.vm, alloc_addr, Num("I")) -# This also works: -_memarray = mem_array_type(Num("I"))(jitter.vm, alloc_addr) +memarray = Array(Num("I")).pinned(jitter.vm, alloc_addr) memarray[0] = 0x02 assert memarray[0] == 0x02 assert jitter.vm.get_mem(memarray.get_addr(), @@ -188,19 +185,10 @@ try: except ValueError: pass -try: - memarray[1, 2] - assert False, "Should raise, mismatched sizes" -except ValueError: - pass - -# PinnedSizedArray tests -memsarray = PinnedSizedArray(jitter.vm, None, Num("I"), 10) -# This also works: -_memsarray = Array(Num("I"), 10).pinned(jitter.vm) +memsarray = Array(Num("I"), 10).pinned(jitter.vm) # And Array(type, size).pinned generates statically sized types -assert _memsarray.sizeof() == len(memsarray) +assert memsarray.sizeof() == Num("I").size() * 10 memsarray.memset('\xcc') assert memsarray[0] == 0xcccccccc assert len(memsarray) == 10 * 4 @@ -247,7 +235,7 @@ for val in ms2.s2: assert val == 1 ### Field assignment (PinnedSizedArray) -array2 = PinnedSizedArray(jitter.vm, None, Num("B"), 10) +array2 = Array(Num("B"), 10).pinned(jitter.vm) jitter.vm.set_mem(array2.get_addr(), '\x02'*10) for val in array2: assert val == 2 @@ -434,7 +422,7 @@ assert PinnedShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 # Manual cast inside an Array ms2.s2[4] = 0xcd ms2.s2[5] = 0xab -assert PinnedShort(jitter.vm, ms2.s2.index2addr(4)).val == 0xabcd +assert PinnedShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd # void* style cast PinnedPtrVoid = Ptr("I", PinnedVoid).pinned @@ -492,8 +480,8 @@ assert Num("f").pinned == Num("f").pinned assert Num("d").pinned != Num("f").pinned assert Union([("f1", Num("I")), ("f2", Num("H"))]).pinned == \ Union([("f1", Num("I")), ("f2", Num("H"))]).pinned -assert mem_array_type(Num("B")) == mem_array_type(Num("B")) -assert mem_array_type(Num("I")) != mem_array_type(Num("B")) +assert Array(Num("B")).pinned == Array(Num("B")).pinned +assert Array(Num("I")).pinned != Array(Num("B")).pinned assert Array(Num("B"), 20).pinned == Array(Num("B"), 20).pinned assert Array(Num("B"), 19).pinned != Array(Num("B"), 20).pinned -- cgit 1.4.1 From f21429370a65504745290c39ecb8113163976232 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 30 Nov 2015 11:00:48 +0100 Subject: MemStruct: Str type --- example/jitter/memstruct.py | 20 ++++----- miasm2/analysis/mem.py | 99 +++++++++++++++++++++++++++------------------ test/analysis/mem.py | 34 ++++++++-------- 3 files changed, 86 insertions(+), 67 deletions(-) (limited to 'test') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index 038622ba..5472798d 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -6,8 +6,8 @@ as well. """ from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import PinnedStruct, PinnedSelf, PinnedVoid, PinnedStr,\ - PinnedSizedArray, Ptr, Num, Array, set_allocator +from miasm2.analysis.mem import PinnedStruct, Self, Void, Str, Array, Ptr, \ + Num, Array, set_allocator from miasm2.os_dep.common import heap # Instanciate a heap @@ -29,10 +29,10 @@ class ListNode(PinnedStruct): # special marker PinnedSelf. # You could also set or modify ListNode.fields after the class # declaration and call ListNode.gen_fields() - ("next", Ptr(" Date: Mon, 30 Nov 2015 11:13:13 +0100 Subject: MemStruct: allow Type instance in cast --- example/jitter/memstruct.py | 6 +++--- miasm2/analysis/mem.py | 26 +++++++++++++++----------- test/analysis/mem.py | 3 +-- 3 files changed, 19 insertions(+), 16 deletions(-) (limited to 'test') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index 5472798d..3b6358cd 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -214,10 +214,10 @@ print raw_miams = '\x00'.join('Miams') + '\x00'*3 raw_miams_array = [ord(c) for c in raw_miams] assert list(data.array)[:len(raw_miams_array)] == raw_miams_array -assert data.array.cast(Str("utf16").pinned) == memstr +assert data.array.cast(Str("utf16")) == memstr # Default is "ansi" -assert data.array.cast(Str().pinned) != memstr -assert data.array.cast(Str("utf16").pinned).val == memstr.val +assert data.array.cast(Str()) != memstr +assert data.array.cast(Str("utf16")).val == memstr.val print "See that the original array has been modified:" print repr(data) diff --git a/miasm2/analysis/mem.py b/miasm2/analysis/mem.py index 9787a25e..4d9ac712 100644 --- a/miasm2/analysis/mem.py +++ b/miasm2/analysis/mem.py @@ -946,11 +946,16 @@ class PinnedType(object): raise ValueError("byte must be a 1-lengthed str") self._vm.set_mem(self.get_addr(), byte * self.get_size()) - def cast(self, other_type, *type_args, **type_kwargs): - """Cast this PinnedType to another PinnedType (same address, same vm, but - different type). Return the casted PinnedType. + def cast(self, other_type): + """Cast this PinnedType to another PinnedType (same address, same vm, + but different type). Return the casted PinnedType. + + @other_type: either a Type instance (other_type.pinned is used) or a + PinnedType subclass """ - return other_type(self._vm, self.get_addr(), *type_args, **type_kwargs) + if isinstance(other_type, Type): + other_type = other_type.pinned + return other_type(self._vm, self.get_addr()) def cast_field(self, field, other_type, *type_args, **type_kwargs): """ABSTRACT: Same as cast, but the address of the returned PinnedType @@ -958,6 +963,8 @@ class PinnedType(object): @field: field specification, for example its name for a struct, or an index in an array. See the subclass doc. + @other_type: either a Type instance (other_type.pinned is used) or a + PinnedType subclass """ raise NotImplementedError("Abstract") @@ -1074,12 +1081,13 @@ class PinnedStruct(PinnedType): """ return self._type.set_field(self._vm, self.get_addr(), name, val) - def cast_field(self, field, other_type, *type_args, **type_kwargs): + def cast_field(self, field, other_type): """ @field: a field name """ - return other_type(self._vm, self.get_addr(field), - *type_args, **type_kwargs) + if isinstance(other_type, Type): + other_type = other_type.pinned + return other_type(self._vm, self.get_addr(field)) # Field generation methods, voluntarily public to be able to gen fields @@ -1277,10 +1285,6 @@ class PinnedArray(PinnedType): def __setitem__(self, idx, item): self.get_type().set_item(self._vm, self._addr, idx, item) - # just a shorthand - def as_mem_str(self, encoding="ansi"): - return self.cast(Str(encoding).pinned) - def raw(self): raise ValueError("%s is unsized, which prevents from getting its full " "raw representation. Use PinnedSizedArray instead." % diff --git a/test/analysis/mem.py b/test/analysis/mem.py index 8d4a56d3..90022fe9 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -426,11 +426,10 @@ assert PinnedShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd # void* style cast PinnedPtrVoid = Ptr("I", Void()).pinned -PinnedPtrMyStruct = Ptr("I", MyStruct).pinned p = PinnedPtrVoid(jitter.vm) p.val = mstruct.get_addr() assert p.deref.cast(MyStruct) == mstruct -assert p.cast(PinnedPtrMyStruct).deref == mstruct +assert p.cast(Ptr("I", MyStruct)).deref == mstruct # Field equality tests assert RawStruct("IH") == RawStruct("IH") -- cgit 1.4.1 From 31650c36e3c079445fe6c26fc0a40c1bd19da57d Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 30 Nov 2015 15:25:27 +0100 Subject: MemStruct: Global doc update --- example/jitter/memstruct.py | 38 +++-- miasm2/analysis/mem.py | 386 ++++++++++++++++++++++++++------------------ test/analysis/mem.py | 4 +- 3 files changed, 254 insertions(+), 174 deletions(-) (limited to 'test') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index 3b6358cd..77d65d17 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -13,10 +13,11 @@ from miasm2.os_dep.common import heap # Instanciate a heap my_heap = heap() # And set it as the default memory allocator, to avoid manual allocation and -# explicit address passing to the PinnedStruct constructor +# explicit address passing to the PinnedType subclasses (like PinnedStruct) +# constructor set_allocator(my_heap.vm_alloc) -# Let's reimplement a simple C generic linked list mapped on a VmMngr! +# Let's reimplement a simple C generic linked list mapped on a VmMngr. # All the structures and methods will use the python objects but all the data # is in fact stored in the VmMngr @@ -24,14 +25,14 @@ set_allocator(my_heap.vm_alloc) class ListNode(PinnedStruct): fields = [ # The ", ),]; creates fields that correspond to + certain bits of the field; analogous to a Union of Bits (see Bits below) + - Str: a character string, with an encoding; not directly mapped to a C + type, it is a higher level notion provided for ease of use + - Void: analogous to C void, can be a placeholder in void*-style cases. + - Self: special marker to reference a Struct inside itself (FIXME: to + remove?) + +And some less common types: + + - Bits: mask only some bits of a Num + - RawStruct: abstraction over a simple struct pack/unpack (no mapping to a + standard C type) + +For each type, the `.pinned` property returns a PinnedType subclass that +allows to access the field in memory. + + +The easiest way to use the API to declare and manipulate new structures is to +subclass PinnedStruct and define a list of (, ): # FIXME: "I" => "u32" class MyStruct(PinnedStruct): fields = [ - # Integer field: just struct.pack fields with one value + # Scalar field: just struct.pack field with one value ("num", Num("I")), ("flags", Num("B")), - # Ptr fields are Num, but they can also be dereferenced - # (self.deref_). Deref can be read and set. + # Ptr fields contain two fields: "val", for the numerical value, + # and "deref" to get the pointed object ("other", Ptr("I", OtherStruct)), # Ptr to a variable length String - ("s", Ptr("I", PinnedStr)), + ("s", Ptr("I", Str())), ("i", Ptr("I", Num("I"))), ] @@ -22,8 +55,13 @@ And access the fields: mstruct = MyStruct(jitter.vm, addr) mstruct.num = 3 assert mstruct.num == 3 + mstruct.other.val = addr2 + # Also works: mstruct.other = addr2 - mstruct.deref_other = OtherStruct(jitter.vm, addr) + mstruct.other.deref = OtherStruct(jitter.vm, addr) + +PinnedUnion and PinnedBitField can also be subclassed, the `fields` field being +in the format expected by, respectively, Union and BitField. The `addr` argument can be omited if an allocator is set, in which case the structure will be automatically allocated in memory: @@ -34,46 +72,6 @@ structure will be automatically allocated in memory: Note that some structures (e.g. PinnedStr or PinnedArray) do not have a static size and cannot be allocated automatically. - - -As you saw previously, to use this module, you just have to inherit from -PinnedStruct and define a list of (, ). Available -Type classes are: - - - Num: for number (float or int) handling - - RawStruct: abstraction over a simple struct pack/unpack - - Ptr: a pointer to another PinnedType instance - - FIXME: TODEL Inline: include another PinnedStruct as a field (equivalent to having a - struct field into another struct in C) - - Array: a fixed size array of Types (points) - - Union: similar to `union` in C, list of Types at the same offset in a - structure; the union has the size of the biggest Type - - BitField: similar to C bitfields, a list of - [( as well as when used, to set and - get the pointed PinnedType. + Mapped to PinnedPtr (see its doc for more info): + + assert isinstance(mystruct.ptr, PinnedPtr) + mystruct.ptr = 0x4000 # Assign the Ptr numeric value + mystruct.ptr.val = 0x4000 # Also assigns the Ptr numeric value + assert isinstance(mystruct.ptr.val, int) # Get the Ptr numeric value + mystruct.ptr.deref # Get the pointed PinnedType + mystruct.ptr.deref = other # Set the pointed PinnedType """ def __init__(self, fmt, dst_type, *type_args, **type_kwargs): @@ -352,6 +367,7 @@ class Ptr(Num): return self._dst_type def set(self, vm, addr, val): + """A Ptr field can be set with a PinnedPtr or an int""" if isinstance(val, PinnedType) and isinstance(val.get_type(), Ptr): self.set_val(vm, addr, val.val) else: @@ -361,9 +377,11 @@ class Ptr(Num): return self.pinned(vm, addr) def get_val(self, vm, addr): + """Get the numeric value of a Ptr""" return super(Ptr, self).get(vm, addr) def set_val(self, vm, addr, val): + """Set the numeric value of a Ptr""" return super(Ptr, self).set(vm, addr, val) def deref_get(self, vm, addr): @@ -391,7 +409,7 @@ class Ptr(Num): return PinnedPtr def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self._dst_type) + return "%s(%r)" % (self.__class__.__name__, self.dst_type.get_type()) def __eq__(self, other): return super(Ptr, self).__eq__(other) and \ @@ -405,24 +423,25 @@ class Ptr(Num): class Struct(Type): - """Field used to inline a PinnedType in another PinnedType. Equivalent to - having a struct field in a C struct. - - Concretely: + """Equivalent to a C struct type. Composed of a name, and a + (, ) list describing the fields + of the struct. - class MyStructClass(PinnedStruct): - fields = [("f1", Num("I")), ("f2", Num("I"))] + Mapped to PinnedStruct. - class Example(PinnedStruct): - fields = [("mystruct", Inline(MyStructClass))] + NOTE: The `.pinned` property of Struct creates classes on the fly. If an + equivalent structure is created by subclassing PinnedStruct, an exception + is raised to prevent creating multiple classes designating the same type. - ex = Example(vm, addr) - ex.mystruct.f2 = 3 # inlined structure field access - ex.mystruct = MyStructClass(vm, addr2) # struct copy + Example: + s = Struct("Toto", [("f1", Num("I")), ("f2", Num("I"))]) - It can be seen like a bridge to use a PinnedStruct as a Type + Toto1 = s.pinned - TODO: make the Inline implicit when setting a field to be a PinnedStruct + # This raises an exception, because it describes the same structure as + # Toto1 + class Toto(PinnedStruct): + fields = [("f1", Num("I")), ("f2", Num("I"))] """ def __init__(self, name, fields): @@ -438,18 +457,8 @@ class Struct(Type): for name, field in self._fields: # For reflexion field._set_self_type(self) - self._gen_field(name, field, offset) + self._fields_desc[name] = {"field": field, "offset": offset} offset += field.size() - self._size = offset - - def _gen_field(self, name, field, offset): - """Generate only one field - - @name: (str) the name of the field - @field: (Type instance) the field type - @offset: (int) the offset of the field in the structure - """ - self._fields_desc[name] = {"field": field, "offset": offset} @property def fields(self): @@ -463,22 +472,16 @@ class Struct(Type): return self.pinned(vm, addr) def get_field(self, vm, addr, name): - """get a field value by name. - - useless most of the time since fields are accessible via self.. - """ + """Get a field value by @name and base structure @addr in @vm VmMngr.""" if name not in self._fields_desc: - raise ValueError("'%s' type has no field '%s'" - % (self, name)) + raise ValueError("'%s' type has no field '%s'" % (self, name)) field = self.get_field_type(name) offset = self.get_offset(name) return field.get(vm, addr + offset) def set_field(self, vm, addr, name, val): - """set a field value by name. @val is the python value corresponding to - this field type. - - useless most of the time since fields are accessible via self.. + """Set a field value by @name and base structure @addr in @vm VmMngr. + @val is the python value corresponding to this field type. """ if name not in self._fields_desc: raise AttributeError("'%s' object has no attribute '%s'" @@ -488,9 +491,7 @@ class Struct(Type): field.set(vm, addr + offset, val) def size(self): - # Child classes can set self._size if their size is not the sum of - # their fields - return sum(a["field"].size() for a in self._fields_desc.itervalues()) + return sum(field.size() for _, field in self.fields) def get_offset(self, field_name): """ @@ -502,15 +503,14 @@ class Struct(Type): return self._fields_desc[field_name]['offset'] def get_field_type(self, name): - """return the type subclass instance describing field @name.""" - # TODO: move it to Struct + """Return the Type subclass instance describing field @name.""" return self._fields_desc[name]['field'] def _get_pinned_base_class(self): return PinnedStruct def __repr__(self): - return "Struct%s" % self.name + return "struct %s" % self.name def __eq__(self, other): return self.__class__ == other.__class__ and \ @@ -524,8 +524,13 @@ class Struct(Type): class Union(Struct): - """Allows to put multiple fields at the same offset in a PinnedStruct, similar - to unions in C. The Union will have the size of the largest of its fields. + """Represents a C union. + + Allows to put multiple fields at the same offset in a PinnedStruct, + similar to unions in C. The Union will have the size of the largest of its + fields. + + Mapped to PinnedUnion. Example: @@ -542,7 +547,7 @@ class Union(Struct): """ def __init__(self, field_list): - """field_list is a [(name, field)] list, see the class doc""" + """@field_list: a [(name, field)] list, see the class doc""" super(Union, self).__init__("union", field_list) def size(self): @@ -561,11 +566,18 @@ class Union(Struct): class Array(Type): - """A fixed size array (contiguous sequence) of a Type subclass - elements. Similar to something like the char[10] type in C. + """An array (contiguous sequence) of a Type subclass elements. + + Can be sized (similar to something like the char[10] type in C) or unsized + if no @array_len is given to the constructor (similar to char* used as an + array). + + Mapped to PinnedArray or PinnedSizedArray, depending on if the Array is + sized or not. Getting an array field actually returns a PinnedSizedArray. Setting it is - possible with either a list or a PinnedSizedArray instance. Examples of syntax: + possible with either a list or a PinnedSizedArray instance. Examples of + syntax: class Example(PinnedStruct): fields = [("array", Array(Num("B"), 4))] @@ -616,10 +628,14 @@ class Array(Type): "array_len instead." % self) def get_offset(self, idx): + """Returns the offset of the item at index @idx.""" return self.field_type.size() * idx def get_item(self, vm, addr, idx): - """idx can be a slice""" + """Get the item(s) at index @idx. + + @idx: int, long or slice + """ if isinstance(idx, slice): res = [] idx = self._normalize_slice(idx) @@ -630,6 +646,9 @@ class Array(Type): return self.field_type.get(vm, addr + self.get_offset(idx)) def set_item(self, vm, addr, idx, item): + """Sets one or multiple items in this array (@idx can be an int, long + or slice). + """ if isinstance(idx, slice): idx = self._normalize_slice(idx) if len(item) != len(xrange(idx.start, idx.stop, idx.step)): @@ -641,6 +660,9 @@ class Array(Type): self.field_type.set(vm, addr + self.get_offset(idx), item) def is_sized(self): + """True if this is a sized array (non None self.array_len), False + otherwise. + """ return self.array_len is not None def _normalize_idx(self, idx): @@ -669,7 +691,7 @@ class Array(Type): return PinnedArray def __repr__(self): - return "%r[%s]" % (self.field_type, self.array_len) + return "%r[%s]" % (self.field_type, self.array_len or "unsized") def __eq__(self, other): return self.__class__ == other.__class__ and \ @@ -752,6 +774,8 @@ class BitField(Union): endian int, little endian short...). Can be seen (and implemented) as a Union of Bits fields. + Mapped to PinnedBitField. + Creates fields that allow to access the bitfield fields easily. Example: class Example(PinnedStruct): @@ -787,6 +811,9 @@ class BitField(Union): def set(self, vm, addr, val): self._num.set(vm, addr, val) + def _get_pinned_base_class(self): + return PinnedBitField + def __eq__(self, other): return self.__class__ == other.__class__ and \ self._num == other._num and super(BitField, self).__eq__(other) @@ -794,8 +821,23 @@ class BitField(Union): def __hash__(self): return hash((super(BitField, self).__hash__(), self._num)) + def __repr__(self): + fields_repr = ', '.join("%s: %r" % (name, field.bit_size) + for name, field in self.fields) + return "%s(%s)" % (self.__class__.__name__, fields_repr) + class Str(Type): + """A string type that handles encoding. This type is unsized (no static + size). + + The @encoding is passed to the constructor, and is currently either null + terminated "ansi" (latin1) or (double) null terminated "utf16". Be aware + that the utf16 implementation is a bit buggy... + + Mapped to PinnedStr. + """ + def __init__(self, encoding="ansi"): # TODO: encoding as lambda if encoding not in ["ansi", "utf16"]: @@ -828,6 +870,7 @@ class Str(Type): @property def enc(self): + """This Str's encoding name (as a str).""" return self._enc def _get_pinned_base_class(self): @@ -844,7 +887,10 @@ class Str(Type): class Void(Type): - """Represents the C void type.""" + """Represents the C void type. + + Mapped to PinnedVoid. + """ def _build_pinned_type(self): return PinnedVoid @@ -855,7 +901,20 @@ class Void(Type): def __hash__(self): return hash(self.__class__) + class Self(Void): + """Special marker to reference a type inside itself. + + Mapped to PinnedSelf. + + Example: + class ListNode(PinnedStruct): + fields = [ + ("next", Ptr("). Deref can be read and set. + # Ptr fields contain two fields: "val", for the numerical value, + # and "deref" to get the pointed object ("other", Ptr("I", OtherStruct)), - ("i", Ptr("I", Num("I"))), # Ptr to a variable length String - ("s", Ptr("I", PinnedStr)), + ("s", Ptr("I", Str())), + ("i", Ptr("I", Num("I"))), ] mstruct = MyStruct(vm, addr) @@ -1046,11 +1122,17 @@ class PinnedStruct(PinnedType): other = OtherStruct(vm, addr2) mstruct.other = other.get_addr() - assert mstruct.other == other.get_addr() - assert mstruct.deref_other == other - assert mstruct.deref_other.foo == 0x1234 + assert mstruct.other.val == other.get_addr() + assert mstruct.other.deref == other + assert mstruct.other.deref.foo == 0x1234 - See the various Type doc for more information. + Note that: + MyStruct = Struct("MyStruct", ).pinned + is equivalent to the previous MyStruct declaration. + + See the various Type-s doc for more information. See PinnedStruct.gen_fields + doc for more information on how to handle recursive types and cyclic + dependencies. """ __metaclass__ = _MetaPinnedStruct fields = None @@ -1067,14 +1149,14 @@ class PinnedStruct(PinnedType): return self._addr + offset def get_field(self, name): - """get a field value by name. + """Get a field value by name. useless most of the time since fields are accessible via self.. """ return self._type.get_field(self._vm, self.get_addr(), name) def set_field(self, name, val): - """set a field value by name. @val is the python value corresponding to + """Set a field value by name. @val is the python value corresponding to this field type. useless most of the time since fields are accessible via self.. @@ -1082,17 +1164,13 @@ class PinnedStruct(PinnedType): return self._type.set_field(self._vm, self.get_addr(), name, val) def cast_field(self, field, other_type): - """ - @field: a field name - """ + """In this implementation, @field is a field name""" if isinstance(other_type, Type): other_type = other_type.pinned return other_type(self._vm, self.get_addr(field)) - - # Field generation methods, voluntarily public to be able to gen fields + # Field generation method, voluntarily public to be able to gen fields # after class definition - @classmethod def gen_fields(cls, fields=None): """Generate the fields of this class (so that they can be accessed with @@ -1165,21 +1243,22 @@ class PinnedStruct(PinnedType): class PinnedUnion(PinnedStruct): + """Same as PinnedStruct but all fields have a 0 offset in the struct.""" @classmethod def _gen_type(cls, fields): return Union(fields) +class PinnedBitField(PinnedUnion): + """PinnedUnion of Bits(...) fields.""" + @classmethod + def _gen_type(cls, fields): + return BitField(fields) + + class PinnedSelf(PinnedStruct): """Special Marker class for reference to current class in a Ptr or Array - (mostly Array of Ptr). - - Example: - class ListNode(PinnedStruct): - fields = [ - ("next", Ptr(" Date: Mon, 30 Nov 2015 15:48:09 +0100 Subject: MemStruct: Pinned* renamed back to Mem* --- example/jitter/memstruct.py | 22 ++-- miasm2/analysis/mem.py | 250 ++++++++++++++++++++++---------------------- test/analysis/mem.py | 60 +++++------ 3 files changed, 166 insertions(+), 166 deletions(-) (limited to 'test') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py index 77d65d17..4ddbea86 100644 --- a/example/jitter/memstruct.py +++ b/example/jitter/memstruct.py @@ -6,14 +6,14 @@ as well. """ from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import PinnedStruct, Self, Void, Str, Array, Ptr, \ +from miasm2.analysis.mem import MemStruct, Self, Void, Str, Array, Ptr, \ Num, Array, set_allocator from miasm2.os_dep.common import heap # Instanciate a heap my_heap = heap() # And set it as the default memory allocator, to avoid manual allocation and -# explicit address passing to the PinnedType subclasses (like PinnedStruct) +# explicit address passing to the MemType subclasses (like MemStruct) # constructor set_allocator(my_heap.vm_alloc) @@ -22,7 +22,7 @@ set_allocator(my_heap.vm_alloc) # All the structures and methods will use the python objects but all the data # is in fact stored in the VmMngr -class ListNode(PinnedStruct): +class ListNode(MemStruct): fields = [ # The ", ): +subclass MemStruct and define a list of (, ): # FIXME: "I" => "u32" - class MyStruct(PinnedStruct): + class MyStruct(MemStruct): fields = [ # Scalar field: just struct.pack field with one value ("num", Num("I")), @@ -60,7 +60,7 @@ And access the fields: mstruct.other = addr2 mstruct.other.deref = OtherStruct(jitter.vm, addr) -PinnedUnion and PinnedBitField can also be subclassed, the `fields` field being +MemUnion and MemBitField can also be subclassed, the `fields` field being in the format expected by, respectively, Union and BitField. The `addr` argument can be omited if an allocator is set, in which case the @@ -70,7 +70,7 @@ structure will be automatically allocated in memory: # the allocator is a func(VmMngr) -> integer_address set_allocator(my_heap) -Note that some structures (e.g. PinnedStr or PinnedArray) do not have a static +Note that some structures (e.g. MemStr or MemArray) do not have a static size and cannot be allocated automatically. """ @@ -84,15 +84,15 @@ log.addHandler(console_handler) log.setLevel(logging.WARN) # ALLOCATOR is a function(vm, size) -> allocated_address -# TODO: as a PinnedType class attribute +# TODO: as a MemType class attribute ALLOCATOR = None -# Cache for dynamically generated PinnedTypes +# Cache for dynamically generated MemTypes DYN_MEM_STRUCT_CACHE = {} def set_allocator(alloc_func): """Set an allocator for this module; allows to instanciate statically sized - PinnedTypes (i.e. sizeof() is implemented) without specifying the address + MemTypes (i.e. sizeof() is implemented) without specifying the address (the object is allocated by @alloc_func in the vm. @alloc_func: func(VmMngr) -> integer_address @@ -169,11 +169,11 @@ class Type(object): """Base class to provide methods to describe a type, as well as how to set and get fields from virtual mem. - Each Type subclass is linked to a PinnedType subclass (e.g. Struct to - PinnedStruct, Ptr to PinnedPtr, etc.). + Each Type subclass is linked to a MemType subclass (e.g. Struct to + MemStruct, Ptr to MemPtr, etc.). - When nothing is specified, PinnedValue is used to access the type in memory. - PinnedValue instances have one `.val` field, setting and getting it call + When nothing is specified, MemValue is used to access the type in memory. + MemValue instances have one `.val` field, setting and getting it call the set and get of the Type. Subclasses can either override _pack and _unpack, or get and set if data @@ -214,7 +214,7 @@ class Type(object): """Returns a class with a (vm, addr) constructor that allows to interact with this type in memory. - @return: a PinnedType subclass. + @return: a MemType subclass. """ if self in DYN_MEM_STRUCT_CACHE: return DYN_MEM_STRUCT_CACHE[self] @@ -223,26 +223,26 @@ class Type(object): return pinned_type def _build_pinned_type(self): - """Builds the PinnedType subclass allowing to interract with this type. + """Builds the MemType subclass allowing to interract with this type. Called by self.pinned when it is not in cache. """ pinned_base_class = self._get_pinned_base_class() - pinned_type = type("Pinned%r" % self, (pinned_base_class,), + pinned_type = type("Mem%r" % self, (pinned_base_class,), {'_type': self}) return pinned_type def _get_pinned_base_class(self): - """Return the PinnedType subclass that maps this type in memory""" - return PinnedValue + """Return the MemType subclass that maps this type in memory""" + return MemValue def _get_self_type(self): """Used for the Self trick.""" return self._self_type def _set_self_type(self, self_type): - """If this field refers to PinnedSelf/Self, replace it with @self_type - (a PinnedType subclass) when using it. Generally not used outside this + """If this field refers to MemSelf/Self, replace it with @self_type + (a MemType subclass) when using it. Generally not used outside this module. """ self._self_type = self_type @@ -306,45 +306,45 @@ class Num(RawStruct): class Ptr(Num): """Special case of number of which value indicates the address of a - PinnedType. + MemType. - Mapped to PinnedPtr (see its doc for more info): + Mapped to MemPtr (see its doc for more info): - assert isinstance(mystruct.ptr, PinnedPtr) + assert isinstance(mystruct.ptr, MemPtr) mystruct.ptr = 0x4000 # Assign the Ptr numeric value mystruct.ptr.val = 0x4000 # Also assigns the Ptr numeric value assert isinstance(mystruct.ptr.val, int) # Get the Ptr numeric value - mystruct.ptr.deref # Get the pointed PinnedType - mystruct.ptr.deref = other # Set the pointed PinnedType + mystruct.ptr.deref # Get the pointed MemType + mystruct.ptr.deref = other # Set the pointed MemType """ def __init__(self, fmt, dst_type, *type_args, **type_kwargs): """ @fmt: (str) Num compatible format that will be the Ptr representation in memory - @dst_type: (PinnedType or Type) the PinnedType this Ptr points to. - If a Type is given, it is transformed into a PinnedType with + @dst_type: (MemType or Type) the MemType this Ptr points to. + If a Type is given, it is transformed into a MemType with TheType.pinned. *type_args, **type_kwargs: arguments to pass to the the pointed - PinnedType when instanciating it (e.g. for PinnedStr encoding or - PinnedArray field_type). + MemType when instanciating it (e.g. for MemStr encoding or + MemArray field_type). """ if (not isinstance(dst_type, Type) and not (isinstance(dst_type, type) and - issubclass(dst_type, PinnedType)) and - not dst_type == PinnedSelf): - raise ValueError("dst_type of Ptr must be a PinnedType type, a " - "Type instance, the PinnedSelf marker or a class " + issubclass(dst_type, MemType)) and + not dst_type == MemSelf): + raise ValueError("dst_type of Ptr must be a MemType type, a " + "Type instance, the MemSelf marker or a class " "name.") super(Ptr, self).__init__(fmt) if isinstance(dst_type, Type): - # Patch the field to propagate the PinnedSelf replacement + # Patch the field to propagate the MemSelf replacement dst_type._get_self_type = lambda: self._get_self_type() # dst_type cannot be patched here, since _get_self_type of the outer # class has not yet been set. Patching dst_type involves calling # dst_type.pinned, which will only return a type that does not point - # on PinnedSelf but on the right class only when _get_self_type of the - # outer class has been replaced by _MetaPinnedStruct. + # on MemSelf but on the right class only when _get_self_type of the + # outer class has been replaced by _MetaMemStruct. # In short, dst_type = dst_type.pinned is not valid here, it is done # lazily in _fix_dst_type self._dst_type = dst_type @@ -352,23 +352,23 @@ class Ptr(Num): self._type_kwargs = type_kwargs def _fix_dst_type(self): - if self._dst_type == PinnedSelf: + if self._dst_type == MemSelf: if self._get_self_type() is not None: self._dst_type = self._get_self_type() else: - raise ValueError("Unsupported usecase for PinnedSelf, sorry") + raise ValueError("Unsupported usecase for MemSelf, sorry") if isinstance(self._dst_type, Type): self._dst_type = self._dst_type.pinned @property def dst_type(self): - """Return the type (PinnedType subtype) this Ptr points to.""" + """Return the type (MemType subtype) this Ptr points to.""" self._fix_dst_type() return self._dst_type def set(self, vm, addr, val): - """A Ptr field can be set with a PinnedPtr or an int""" - if isinstance(val, PinnedType) and isinstance(val.get_type(), Ptr): + """A Ptr field can be set with a MemPtr or an int""" + if isinstance(val, MemType) and isinstance(val.get_type(), Ptr): self.set_val(vm, addr, val.val) else: super(Ptr, self).set(vm, addr, val) @@ -393,7 +393,7 @@ class Ptr(Num): *self._type_args, **self._type_kwargs) def deref_set(self, vm, addr, val): - """Serializes the @val PinnedType subclass instance in @vm (VmMngr) at + """Serializes the @val MemType subclass instance in @vm (VmMngr) at @addr. Equivalent to a pointer dereference assignment in C. """ # Sanity check @@ -406,7 +406,7 @@ class Ptr(Num): vm.set_mem(dst_addr, str(val)) def _get_pinned_base_class(self): - return PinnedPtr + return MemPtr def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self.dst_type.get_type()) @@ -427,10 +427,10 @@ class Struct(Type): (, ) list describing the fields of the struct. - Mapped to PinnedStruct. + Mapped to MemStruct. NOTE: The `.pinned` property of Struct creates classes on the fly. If an - equivalent structure is created by subclassing PinnedStruct, an exception + equivalent structure is created by subclassing MemStruct, an exception is raised to prevent creating multiple classes designating the same type. Example: @@ -440,7 +440,7 @@ class Struct(Type): # This raises an exception, because it describes the same structure as # Toto1 - class Toto(PinnedStruct): + class Toto(MemStruct): fields = [("f1", Num("I")), ("f2", Num("I"))] """ @@ -507,7 +507,7 @@ class Struct(Type): return self._fields_desc[name]['field'] def _get_pinned_base_class(self): - return PinnedStruct + return MemStruct def __repr__(self): return "struct %s" % self.name @@ -526,15 +526,15 @@ class Struct(Type): class Union(Struct): """Represents a C union. - Allows to put multiple fields at the same offset in a PinnedStruct, + Allows to put multiple fields at the same offset in a MemStruct, similar to unions in C. The Union will have the size of the largest of its fields. - Mapped to PinnedUnion. + Mapped to MemUnion. Example: - class Example(PinnedStruct): + class Example(MemStruct): fields = [("uni", Union([ ("f1", Num(").pinned is equivalent to the previous MyStruct declaration. - See the various Type-s doc for more information. See PinnedStruct.gen_fields + See the various Type-s doc for more information. See MemStruct.gen_fields doc for more information on how to handle recursive types and cyclic dependencies. """ - __metaclass__ = _MetaPinnedStruct + __metaclass__ = _MetaMemStruct fields = None def get_addr(self, field_name=None): @@ -1179,18 +1179,18 @@ class PinnedStruct(PinnedType): Useful in case of a type cyclic dependency. For example, the following is not possible in python: - class A(PinnedStruct): + class A(MemStruct): fields = [("b", Ptr("I", B))] - class B(PinnedStruct): + class B(MemStruct): fields = [("a", Ptr("I", A))] With gen_fields, the following is the legal equivalent: - class A(PinnedStruct): + class A(MemStruct): pass - class B(PinnedStruct): + class B(MemStruct): fields = [("a", Ptr("I", A))] A.gen_fields([("b", Ptr("I", B))]) @@ -1204,13 +1204,13 @@ class PinnedStruct(PinnedType): if cls._type is None: if cls.fields is None: - raise ValueError("Cannot create a PinnedStruct subclass without" + raise ValueError("Cannot create a MemStruct subclass without" " a cls._type or a cls.fields") cls._type = cls._gen_type(cls.fields) if cls._type in DYN_MEM_STRUCT_CACHE: # FIXME: Maybe a warning would be better? - raise RuntimeError("Another PinnedType has the same type as this " + raise RuntimeError("Another MemType has the same type as this " "one. Use it instead.") # Register this class so that another one will not be created when @@ -1242,21 +1242,21 @@ class PinnedStruct(PinnedType): return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) -class PinnedUnion(PinnedStruct): - """Same as PinnedStruct but all fields have a 0 offset in the struct.""" +class MemUnion(MemStruct): + """Same as MemStruct but all fields have a 0 offset in the struct.""" @classmethod def _gen_type(cls, fields): return Union(fields) -class PinnedBitField(PinnedUnion): - """PinnedUnion of Bits(...) fields.""" +class MemBitField(MemUnion): + """MemUnion of Bits(...) fields.""" @classmethod def _gen_type(cls, fields): return BitField(fields) -class PinnedSelf(PinnedStruct): +class MemSelf(MemStruct): """Special Marker class for reference to current class in a Ptr or Array (mostly Array of Ptr). See Self doc. """ @@ -1264,7 +1264,7 @@ class PinnedSelf(PinnedStruct): return self.__class__.__name__ -class PinnedVoid(PinnedType): +class MemVoid(MemType): """Placeholder for e.g. Ptr to an undetermined type. Useful mostly when casted to another type. Allows to implement C's "void*" pattern. """ @@ -1274,8 +1274,8 @@ class PinnedVoid(PinnedType): return self.__class__.__name__ -class PinnedPtr(PinnedValue): - """Pinned version of a Ptr, provides two properties: +class MemPtr(MemValue): + """Mem version of a Ptr, provides two properties: - val, to set and get the numeric value of the Ptr - deref, to set and get the pointed type """ @@ -1299,7 +1299,7 @@ class PinnedPtr(PinnedValue): return "*%s" % hex(self.val) -class PinnedStr(PinnedValue): +class MemStr(MemValue): """Implements a string representation in memory. The string value can be got or set (with python str/unicode) through the @@ -1330,7 +1330,7 @@ class PinnedStr(PinnedValue): return "%r: %r" % (self.__class__, self.val) -class PinnedArray(PinnedType): +class MemArray(MemType): """An unsized array of type @field_type (a Type subclass instance). This class has no static or dynamic size. @@ -1345,7 +1345,7 @@ class PinnedArray(PinnedType): @property def field_type(self): """Return the Type subclass instance that represents the type of - this PinnedArray items. + this MemArray items. """ return self.get_type().field_type @@ -1360,15 +1360,15 @@ class PinnedArray(PinnedType): def raw(self): raise ValueError("%s is unsized, which prevents from getting its full " - "raw representation. Use PinnedSizedArray instead." % + "raw representation. Use MemSizedArray instead." % self.__class__) def __repr__(self): return "[%r, ...] [%r]" % (self[0], self.field_type) -class PinnedSizedArray(PinnedArray): - """A fixed size PinnedArray. +class MemSizedArray(MemArray): + """A fixed size MemArray. This type is dynamically sized. Generate a fixed @field_type and @array_len array which has a static size by using Array(type, size).pinned. diff --git a/test/analysis/mem.py b/test/analysis/mem.py index b6664cd2..6c7fc9e3 100644 --- a/test/analysis/mem.py +++ b/test/analysis/mem.py @@ -5,20 +5,20 @@ import struct from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import PinnedStruct, Num, Ptr, Str, \ +from miasm2.analysis.mem import MemStruct, Num, Ptr, Str, \ Array, RawStruct, Union, \ BitField, Self, Void, Bits, \ - set_allocator, PinnedUnion, Struct + set_allocator, MemUnion, Struct from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE from miasm2.os_dep.common import heap # Two structures with some fields -class OtherStruct(PinnedStruct): +class OtherStruct(MemStruct): fields = [ ("foo", Num("H")), ] -class MyStruct(PinnedStruct): +class MyStruct(MemStruct): fields = [ # Number field: just struct.pack fields with one value ("num", Num("I")), @@ -43,7 +43,7 @@ addr_str3 = 0x1300 jitter.vm.add_memory_page(addr, PAGE_READ | PAGE_WRITE, "\xaa"*size) -# PinnedStruct tests +# MemStruct tests ## Creation # Use manual allocation with explicit addr for the first example mstruct = MyStruct(jitter.vm, addr) @@ -57,7 +57,7 @@ assert mstruct.num == 3 memval = struct.unpack("I", jitter.vm.get_mem(mstruct.get_addr(), 4))[0] assert memval == 3 -## Pinnedset sets the whole structure +## Memset sets the whole structure mstruct.memset() assert mstruct.num == 0 assert mstruct.flags == 0 @@ -105,7 +105,7 @@ assert other2.foo == 0xbeef assert other.get_addr() != other2.get_addr() # Not the same address assert other == other2 # But same value -## Same stuff for Ptr to PinnedField +## Same stuff for Ptr to MemField alloc_addr = my_heap.vm_alloc(jitter.vm, mstruct.get_type().get_field_type("i") .dst_type.sizeof()) @@ -148,7 +148,7 @@ memstr3 = Str("utf16").pinned(jitter.vm, addr_str3) memstr3.val = "That's all folks!" assert memstr3.get_addr() != memstr.get_addr() assert memstr3.get_size() != memstr.get_size() # Size is different -assert str(memstr3) != str(memstr) # Pinned representation is different +assert str(memstr3) != str(memstr) # Mem representation is different assert memstr3 != memstr # Encoding is different, so they are not eq assert memstr3.val == memstr.val # But the python value is the same @@ -204,7 +204,7 @@ assert str(memsarray) == '\x02\x00\x00\x00' + '\xcc' * (4 * 9) # Atypical fields (RawStruct and Array) -class MyStruct2(PinnedStruct): +class MyStruct2(MemStruct): fields = [ ("s1", RawStruct("=BI")), ("s2", Array(Num("B"), 10)), @@ -236,7 +236,7 @@ ms2.s2 = [1] * 10 for val in ms2.s2: assert val == 1 -### Field assignment (PinnedSizedArray) +### Field assignment (MemSizedArray) array2 = Array(Num("B"), 10).pinned(jitter.vm) jitter.vm.set_mem(array2.get_addr(), '\x02'*10) for val in array2: @@ -246,14 +246,14 @@ for val in ms2.s2: assert val == 2 -# Inlining a PinnedType tests -class InStruct(PinnedStruct): +# Inlining a MemType tests +class InStruct(MemStruct): fields = [ ("foo", Num("B")), ("bar", Num("B")), ] -class ContStruct(PinnedStruct): +class ContStruct(MemStruct): fields = [ ("one", Num("B")), ("instruct", InStruct.get_type()), @@ -286,7 +286,7 @@ assert jitter.vm.get_mem(cont.get_addr(), len(cont)) == '\x01\x02\x03\x04' # Union test -class UniStruct(PinnedStruct): +class UniStruct(MemStruct): fields = [ ("one", Num("B")), ("union", Union([ @@ -312,7 +312,7 @@ assert uni.union.instruct.bar == 0x22 # BitField test -class BitStruct(PinnedUnion): +class BitStruct(MemUnion): fields = [ ("flags_num", Num("H")), ("flags", BitField(Num("H"), [ @@ -346,7 +346,7 @@ assert bit.flags.f4_1 == 1 # Unhealthy ideas -class UnhealthyIdeas(PinnedStruct): +class UnhealthyIdeas(MemStruct): fields = [ ("pastruct", Ptr("I", Array(RawStruct("=Bf")))), ("apstr", Array(Ptr("I", Str()), 10)), @@ -387,10 +387,10 @@ assert ideas.pppself.deref.deref.deref == ideas # Circular dependencies -class A(PinnedStruct): +class A(MemStruct): pass -class B(PinnedStruct): +class B(MemStruct): fields = [("a", Ptr("I", A)),] # Gen A's fields after declaration @@ -405,30 +405,30 @@ assert b.a.deref == a # Cast tests -# PinnedStruct cast -PinnedInt = Num("I").pinned -PinnedShort = Num("H").pinned -dword = PinnedInt(jitter.vm) +# MemStruct cast +MemInt = Num("I").pinned +MemShort = Num("H").pinned +dword = MemInt(jitter.vm) dword.val = 0x12345678 -assert isinstance(dword.cast(PinnedShort), PinnedShort) -assert dword.cast(PinnedShort).val == 0x5678 +assert isinstance(dword.cast(MemShort), MemShort) +assert dword.cast(MemShort).val == 0x5678 # Field cast ms2.s2[0] = 0x34 ms2.s2[1] = 0x12 -assert ms2.cast_field("s2", PinnedShort).val == 0x1234 +assert ms2.cast_field("s2", MemShort).val == 0x1234 # Other method -assert PinnedShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 +assert MemShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 # Manual cast inside an Array ms2.s2[4] = 0xcd ms2.s2[5] = 0xab -assert PinnedShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd +assert MemShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd # void* style cast -PinnedPtrVoid = Ptr("I", Void()).pinned -p = PinnedPtrVoid(jitter.vm) +MemPtrVoid = Ptr("I", Void()).pinned +p = MemPtrVoid(jitter.vm) p.val = mstruct.get_addr() assert p.deref.cast(MyStruct) == mstruct assert p.cast(Ptr("I", MyStruct)).deref == mstruct @@ -474,7 +474,7 @@ assert BitField(Num("B"), [("f1", 1), ("f2", 4), ("f3", 1)]) != \ BitField(Num("B"), [("f1", 2), ("f2", 4), ("f3", 1)]) -# Quick PinnedField.pinned/PinnedField hash test +# Quick MemField.pinned/MemField hash test assert Num("f").pinned(jitter.vm, addr) == Num("f").pinned(jitter.vm, addr) # Types are cached assert Num("f").pinned == Num("f").pinned -- cgit 1.4.1 From ba2df16277d7d4deae118ed11e1e92cd478045ec Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 30 Nov 2015 16:00:26 +0100 Subject: MemStruct/Types: Renamed analysis.mem to core.types --- example/jitter/memstruct.py | 234 -------- example/jitter/types.py | 234 ++++++++ miasm2/analysis/mem.py | 1399 ------------------------------------------- miasm2/core/types.py | 1399 +++++++++++++++++++++++++++++++++++++++++++ test/analysis/mem.py | 506 ---------------- test/core/types.py | 506 ++++++++++++++++ test/test_all.py | 5 +- 7 files changed, 2142 insertions(+), 2141 deletions(-) delete mode 100644 example/jitter/memstruct.py create mode 100644 example/jitter/types.py delete mode 100644 miasm2/analysis/mem.py create mode 100644 miasm2/core/types.py delete mode 100644 test/analysis/mem.py create mode 100644 test/core/types.py (limited to 'test') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py deleted file mode 100644 index 4ddbea86..00000000 --- a/example/jitter/memstruct.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python -"""This script is just a short example of common usages for miasm2.analysis.mem. -For a more complete view of what is possible, tests/analysis/mem.py covers -most of the module possibilities, and the module doc gives useful information -as well. -""" - -from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import MemStruct, Self, Void, Str, Array, Ptr, \ - Num, Array, set_allocator -from miasm2.os_dep.common import heap - -# Instanciate a heap -my_heap = heap() -# And set it as the default memory allocator, to avoid manual allocation and -# explicit address passing to the MemType subclasses (like MemStruct) -# constructor -set_allocator(my_heap.vm_alloc) - -# Let's reimplement a simple C generic linked list mapped on a VmMngr. - -# All the structures and methods will use the python objects but all the data -# is in fact stored in the VmMngr - -class ListNode(MemStruct): - fields = [ - # The ", ),]; creates fields that correspond to - certain bits of the field; analogous to a Union of Bits (see Bits below) - - Str: a character string, with an encoding; not directly mapped to a C - type, it is a higher level notion provided for ease of use - - Void: analogous to C void, can be a placeholder in void*-style cases. - - Self: special marker to reference a Struct inside itself (FIXME: to - remove?) - -And some less common types: - - - Bits: mask only some bits of a Num - - RawStruct: abstraction over a simple struct pack/unpack (no mapping to a - standard C type) - -For each type, the `.pinned` property returns a MemType subclass that -allows to access the field in memory. - - -The easiest way to use the API to declare and manipulate new structures is to -subclass MemStruct and define a list of (, ): - - # FIXME: "I" => "u32" - class MyStruct(MemStruct): - fields = [ - # Scalar field: just struct.pack field with one value - ("num", Num("I")), - ("flags", Num("B")), - # Ptr fields contain two fields: "val", for the numerical value, - # and "deref" to get the pointed object - ("other", Ptr("I", OtherStruct)), - # Ptr to a variable length String - ("s", Ptr("I", Str())), - ("i", Ptr("I", Num("I"))), - ] - -And access the fields: - - mstruct = MyStruct(jitter.vm, addr) - mstruct.num = 3 - assert mstruct.num == 3 - mstruct.other.val = addr2 - # Also works: - mstruct.other = addr2 - mstruct.other.deref = OtherStruct(jitter.vm, addr) - -MemUnion and MemBitField can also be subclassed, the `fields` field being -in the format expected by, respectively, Union and BitField. - -The `addr` argument can be omited if an allocator is set, in which case the -structure will be automatically allocated in memory: - - my_heap = miasm2.os_dep.common.heap() - # the allocator is a func(VmMngr) -> integer_address - set_allocator(my_heap) - -Note that some structures (e.g. MemStr or MemArray) do not have a static -size and cannot be allocated automatically. -""" - -import logging -import struct - -log = logging.getLogger(__name__) -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARN) - -# ALLOCATOR is a function(vm, size) -> allocated_address -# TODO: as a MemType class attribute -ALLOCATOR = None - -# Cache for dynamically generated MemTypes -DYN_MEM_STRUCT_CACHE = {} - -def set_allocator(alloc_func): - """Set an allocator for this module; allows to instanciate statically sized - MemTypes (i.e. sizeof() is implemented) without specifying the address - (the object is allocated by @alloc_func in the vm. - - @alloc_func: func(VmMngr) -> integer_address - """ - global ALLOCATOR - ALLOCATOR = alloc_func - - -# Helpers - -def indent(s, size=4): - """Indent a string with @size spaces""" - return ' '*size + ('\n' + ' '*size).join(s.split('\n')) - - -# FIXME: copied from miasm2.os_dep.common and fixed -def get_str_ansi(vm, addr, max_char=None): - """Get a null terminated ANSI encoded string from a VmMngr. - - @vm: VmMngr instance - @max_char: max number of characters to get in memory - """ - l = 0 - tmp = addr - while ((max_char is None or l < max_char) and - vm.get_mem(tmp, 1) != "\x00"): - tmp += 1 - l += 1 - return vm.get_mem(addr, l).decode("latin1") - - -# TODO: get_raw_str_utf16 for length calculus -def get_str_utf16(vm, addr, max_char=None): - """Get a (double) null terminated utf16 little endian encoded string from - a VmMngr. This encoding is mainly used in Windows. - - FIXME: the implementation do not work with codepoints that are encoded on - more than 2 bytes in utf16. - - @vm: VmMngr instance - @max_char: max number of bytes to get in memory - """ - l = 0 - tmp = addr - # TODO: test if fetching per page rather than 2 byte per 2 byte is worth it? - while ((max_char is None or l < max_char) and - vm.get_mem(tmp, 2) != "\x00\x00"): - tmp += 2 - l += 2 - s = vm.get_mem(addr, l) - return s.decode('utf-16le') - - -def set_str_ansi(vm, addr, s): - """Encode a string to null terminated ascii/ansi and set it in a VmMngr - memory. - - @vm: VmMngr instance - @addr: start address to serialize the string to - s: the str to serialize - """ - vm.set_mem(addr, s + "\x00") - - -def set_str_utf16(vm, addr, s): - """Same as set_str_ansi with (double) null terminated utf16 encoding.""" - s = (s + '\x00').encode('utf-16le') - vm.set_mem(addr, s) - - -# Type classes - -class Type(object): - """Base class to provide methods to describe a type, as well as how to set - and get fields from virtual mem. - - Each Type subclass is linked to a MemType subclass (e.g. Struct to - MemStruct, Ptr to MemPtr, etc.). - - When nothing is specified, MemValue is used to access the type in memory. - MemValue instances have one `.val` field, setting and getting it call - the set and get of the Type. - - Subclasses can either override _pack and _unpack, or get and set if data - serialization requires more work (see Struct implementation for an example). - - TODO: move any trace of vm and addr out of these classes? - """ - - _self_type = None - - def _pack(self, val): - """Serializes the python value @val to a raw str""" - raise NotImplementedError() - - def _unpack(self, raw_str): - """Deserializes a raw str to an object representing the python value - of this field. - """ - raise NotImplementedError() - - def set(self, vm, addr, val): - """Set a VmMngr memory from a value. - - @vm: VmMngr instance - @addr: the start adress in memory to set - @val: the python value to serialize in @vm at @addr - """ - raw = self._pack(val) - vm.set_mem(addr, raw) - - def get(self, vm, addr): - """Get the python value of a field from a VmMngr memory at @addr.""" - raw = vm.get_mem(addr, self.size()) - return self._unpack(raw) - - @property - def pinned(self): - """Returns a class with a (vm, addr) constructor that allows to - interact with this type in memory. - - @return: a MemType subclass. - """ - if self in DYN_MEM_STRUCT_CACHE: - return DYN_MEM_STRUCT_CACHE[self] - pinned_type = self._build_pinned_type() - DYN_MEM_STRUCT_CACHE[self] = pinned_type - return pinned_type - - def _build_pinned_type(self): - """Builds the MemType subclass allowing to interract with this type. - - Called by self.pinned when it is not in cache. - """ - pinned_base_class = self._get_pinned_base_class() - pinned_type = type("Mem%r" % self, (pinned_base_class,), - {'_type': self}) - return pinned_type - - def _get_pinned_base_class(self): - """Return the MemType subclass that maps this type in memory""" - return MemValue - - def _get_self_type(self): - """Used for the Self trick.""" - return self._self_type - - def _set_self_type(self, self_type): - """If this field refers to MemSelf/Self, replace it with @self_type - (a MemType subclass) when using it. Generally not used outside this - module. - """ - self._self_type = self_type - - def size(self): - """Return the size in bytes of the serialized version of this field""" - raise NotImplementedError() - - def __len__(self): - return self.size() - - def __neq__(self, other): - return not self == other - - -class RawStruct(Type): - """Dumb struct.pack/unpack field. Mainly used to factorize code. - - Value is a tuple corresponding to the struct @fmt passed to the constructor. - """ - - def __init__(self, fmt): - self._fmt = fmt - - def _pack(self, fields): - return struct.pack(self._fmt, *fields) - - def _unpack(self, raw_str): - return struct.unpack(self._fmt, raw_str) - - def size(self): - return struct.calcsize(self._fmt) - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, self._fmt) - - def __eq__(self, other): - return self.__class__ == other.__class__ and self._fmt == other._fmt - - def __hash__(self): - return hash((self.__class__, self._fmt)) - - -class Num(RawStruct): - """Represents a number (integer or float). The number is encoded with - a struct-style format which must represent only one value. - - TODO: use u32, i16, etc. for format. - """ - - def _pack(self, number): - return super(Num, self)._pack([number]) - - def _unpack(self, raw_str): - upck = super(Num, self)._unpack(raw_str) - if len(upck) != 1: - raise ValueError("Num format string unpacks to multiple values, " - "should be 1") - return upck[0] - - -class Ptr(Num): - """Special case of number of which value indicates the address of a - MemType. - - Mapped to MemPtr (see its doc for more info): - - assert isinstance(mystruct.ptr, MemPtr) - mystruct.ptr = 0x4000 # Assign the Ptr numeric value - mystruct.ptr.val = 0x4000 # Also assigns the Ptr numeric value - assert isinstance(mystruct.ptr.val, int) # Get the Ptr numeric value - mystruct.ptr.deref # Get the pointed MemType - mystruct.ptr.deref = other # Set the pointed MemType - """ - - def __init__(self, fmt, dst_type, *type_args, **type_kwargs): - """ - @fmt: (str) Num compatible format that will be the Ptr representation - in memory - @dst_type: (MemType or Type) the MemType this Ptr points to. - If a Type is given, it is transformed into a MemType with - TheType.pinned. - *type_args, **type_kwargs: arguments to pass to the the pointed - MemType when instanciating it (e.g. for MemStr encoding or - MemArray field_type). - """ - if (not isinstance(dst_type, Type) and - not (isinstance(dst_type, type) and - issubclass(dst_type, MemType)) and - not dst_type == MemSelf): - raise ValueError("dst_type of Ptr must be a MemType type, a " - "Type instance, the MemSelf marker or a class " - "name.") - super(Ptr, self).__init__(fmt) - if isinstance(dst_type, Type): - # Patch the field to propagate the MemSelf replacement - dst_type._get_self_type = lambda: self._get_self_type() - # dst_type cannot be patched here, since _get_self_type of the outer - # class has not yet been set. Patching dst_type involves calling - # dst_type.pinned, which will only return a type that does not point - # on MemSelf but on the right class only when _get_self_type of the - # outer class has been replaced by _MetaMemStruct. - # In short, dst_type = dst_type.pinned is not valid here, it is done - # lazily in _fix_dst_type - self._dst_type = dst_type - self._type_args = type_args - self._type_kwargs = type_kwargs - - def _fix_dst_type(self): - if self._dst_type == MemSelf: - if self._get_self_type() is not None: - self._dst_type = self._get_self_type() - else: - raise ValueError("Unsupported usecase for MemSelf, sorry") - if isinstance(self._dst_type, Type): - self._dst_type = self._dst_type.pinned - - @property - def dst_type(self): - """Return the type (MemType subtype) this Ptr points to.""" - self._fix_dst_type() - return self._dst_type - - def set(self, vm, addr, val): - """A Ptr field can be set with a MemPtr or an int""" - if isinstance(val, MemType) and isinstance(val.get_type(), Ptr): - self.set_val(vm, addr, val.val) - else: - super(Ptr, self).set(vm, addr, val) - - def get(self, vm, addr): - return self.pinned(vm, addr) - - def get_val(self, vm, addr): - """Get the numeric value of a Ptr""" - return super(Ptr, self).get(vm, addr) - - def set_val(self, vm, addr, val): - """Set the numeric value of a Ptr""" - return super(Ptr, self).set(vm, addr, val) - - def deref_get(self, vm, addr): - """Deserializes the data in @vm (VmMngr) at @addr to self.dst_type. - Equivalent to a pointer dereference rvalue in C. - """ - dst_addr = self.get_val(vm, addr) - return self.dst_type(vm, dst_addr, - *self._type_args, **self._type_kwargs) - - def deref_set(self, vm, addr, val): - """Serializes the @val MemType subclass instance in @vm (VmMngr) at - @addr. Equivalent to a pointer dereference assignment in C. - """ - # Sanity check - if self.dst_type != val.__class__: - log.warning("Original type was %s, overriden by value of type %s", - self._dst_type.__name__, val.__class__.__name__) - - # Actual job - dst_addr = self.get_val(vm, addr) - vm.set_mem(dst_addr, str(val)) - - def _get_pinned_base_class(self): - return MemPtr - - def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self.dst_type.get_type()) - - def __eq__(self, other): - return super(Ptr, self).__eq__(other) and \ - self.dst_type == other.dst_type and \ - self._type_args == other._type_args and \ - self._type_kwargs == other._type_kwargs - - def __hash__(self): - return hash((super(Ptr, self).__hash__(), self.dst_type, - self._type_args)) - - -class Struct(Type): - """Equivalent to a C struct type. Composed of a name, and a - (, ) list describing the fields - of the struct. - - Mapped to MemStruct. - - NOTE: The `.pinned` property of Struct creates classes on the fly. If an - equivalent structure is created by subclassing MemStruct, an exception - is raised to prevent creating multiple classes designating the same type. - - Example: - s = Struct("Toto", [("f1", Num("I")), ("f2", Num("I"))]) - - Toto1 = s.pinned - - # This raises an exception, because it describes the same structure as - # Toto1 - class Toto(MemStruct): - fields = [("f1", Num("I")), ("f2", Num("I"))] - """ - - def __init__(self, name, fields): - self.name = name - # fields is immutable - self._fields = tuple(fields) - self._gen_fields() - - def _gen_fields(self): - """Precompute useful metadata on self.fields.""" - self._fields_desc = {} - offset = 0 - for name, field in self._fields: - # For reflexion - field._set_self_type(self) - self._fields_desc[name] = {"field": field, "offset": offset} - offset += field.size() - - @property - def fields(self): - return self._fields - - def set(self, vm, addr, val): - raw = str(val) - vm.set_mem(addr, raw) - - def get(self, vm, addr): - return self.pinned(vm, addr) - - def get_field(self, vm, addr, name): - """Get a field value by @name and base structure @addr in @vm VmMngr.""" - if name not in self._fields_desc: - raise ValueError("'%s' type has no field '%s'" % (self, name)) - field = self.get_field_type(name) - offset = self.get_offset(name) - return field.get(vm, addr + offset) - - def set_field(self, vm, addr, name, val): - """Set a field value by @name and base structure @addr in @vm VmMngr. - @val is the python value corresponding to this field type. - """ - if name not in self._fields_desc: - raise AttributeError("'%s' object has no attribute '%s'" - % (self.__class__.__name__, name)) - field = self.get_field_type(name) - offset = self.get_offset(name) - field.set(vm, addr + offset, val) - - def size(self): - return sum(field.size() for _, field in self.fields) - - def get_offset(self, field_name): - """ - @field_name: (str, optional) the name of the field to get the - offset of - """ - if field_name not in self._fields_desc: - raise ValueError("This structure has no %s field" % field_name) - return self._fields_desc[field_name]['offset'] - - def get_field_type(self, name): - """Return the Type subclass instance describing field @name.""" - return self._fields_desc[name]['field'] - - def _get_pinned_base_class(self): - return MemStruct - - def __repr__(self): - return "struct %s" % self.name - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self.fields == other.fields and \ - self.name == other.name - - def __hash__(self): - # Only hash name, not fields, because if a field is a Ptr to this - # Struct type, an infinite loop occurs - return hash((self.__class__, self.name)) - - -class Union(Struct): - """Represents a C union. - - Allows to put multiple fields at the same offset in a MemStruct, - similar to unions in C. The Union will have the size of the largest of its - fields. - - Mapped to MemUnion. - - Example: - - class Example(MemStruct): - fields = [("uni", Union([ - ("f1", Num("= self.size()): - raise IndexError("Index %s out of bounds" % idx) - - def _get_pinned_base_class(self): - if self.is_sized(): - return MemSizedArray - else: - return MemArray - - def __repr__(self): - return "%r[%s]" % (self.field_type, self.array_len or "unsized") - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self.field_type == other.field_type and \ - self.array_len == other.array_len - - def __hash__(self): - return hash((self.__class__, self.field_type, self.array_len)) - - -class Bits(Type): - """Helper class for BitField, not very useful on its own. Represents some - bits of a Num. - - The @backing_num is used to know how to serialize/deserialize data in vm, - but getting/setting this fields only affects bits from @bit_offset to - @bit_offset + @bits. Masking and shifting is handled by the class, the aim - is to provide a transparent way to set and get some bits of a num. - """ - - def __init__(self, backing_num, bits, bit_offset): - if not isinstance(backing_num, Num): - raise ValueError("backing_num should be a Num instance") - self._num = backing_num - self._bits = bits - self._bit_offset = bit_offset - - def set(self, vm, addr, val): - val_mask = (1 << self._bits) - 1 - val_shifted = (val & val_mask) << self._bit_offset - num_size = self._num.size() * 8 - - full_num_mask = (1 << num_size) - 1 - num_mask = (~(val_mask << self._bit_offset)) & full_num_mask - - num_val = self._num.get(vm, addr) - res_val = (num_val & num_mask) | val_shifted - self._num.set(vm, addr, res_val) - - def get(self, vm, addr): - val_mask = (1 << self._bits) - 1 - num_val = self._num.get(vm, addr) - res_val = (num_val >> self._bit_offset) & val_mask - return res_val - - def size(self): - return self._num.size() - - @property - def bit_size(self): - """Number of bits read/written by this class""" - return self._bits - - @property - def bit_offset(self): - """Offset in bits (beginning at 0, the LSB) from which to read/write - bits. - """ - return self._bit_offset - - def __repr__(self): - return "%s%r(%d:%d)" % (self.__class__.__name__, self._num, - self._bit_offset, self._bit_offset + self._bits) - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self._num == other._num and self._bits == other._bits and \ - self._bit_offset == other._bit_offset - - def __hash__(self): - return hash((self.__class__, self._num, self._bits, self._bit_offset)) - - -class BitField(Union): - """A C-like bitfield. - - Constructed with a list [(, )] and a - @backing_num. The @backing_num is a Num instance that determines the total - size of the bitfield and the way the bits are serialized/deserialized (big - endian int, little endian short...). Can be seen (and implemented) as a - Union of Bits fields. - - Mapped to MemBitField. - - Creates fields that allow to access the bitfield fields easily. Example: - - class Example(MemStruct): - fields = [("bf", BitField(Num("B"), [ - ("f1", 2), - ("f2", 4), - ("f3", 1) - ]) - )] - - ex = Example(vm, addr) - ex.memset() - ex.f2 = 2 - ex.f1 = 5 # 5 does not fit on two bits, it will be binarily truncated - assert ex.f1 == 3 - assert ex.f2 == 2 - assert ex.f3 == 0 # previously memset() - assert ex.bf == 3 + 2 << 2 - """ - - def __init__(self, backing_num, bit_list): - """@backing num: Num intance, @bit_list: [(name, n_bits)]""" - self._num = backing_num - fields = [] - offset = 0 - for name, bits in bit_list: - fields.append((name, Bits(self._num, bits, offset))) - offset += bits - if offset > self._num.size() * 8: - raise ValueError("sum of bit lengths is > to the backing num size") - super(BitField, self).__init__(fields) - - def set(self, vm, addr, val): - self._num.set(vm, addr, val) - - def _get_pinned_base_class(self): - return MemBitField - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self._num == other._num and super(BitField, self).__eq__(other) - - def __hash__(self): - return hash((super(BitField, self).__hash__(), self._num)) - - def __repr__(self): - fields_repr = ', '.join("%s: %r" % (name, field.bit_size) - for name, field in self.fields) - return "%s(%s)" % (self.__class__.__name__, fields_repr) - - -class Str(Type): - """A string type that handles encoding. This type is unsized (no static - size). - - The @encoding is passed to the constructor, and is currently either null - terminated "ansi" (latin1) or (double) null terminated "utf16". Be aware - that the utf16 implementation is a bit buggy... - - Mapped to MemStr. - """ - - def __init__(self, encoding="ansi"): - # TODO: encoding as lambda - if encoding not in ["ansi", "utf16"]: - raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") - self._enc = encoding - - def get(self, vm, addr): - """Set the string value in memory""" - if self._enc == "ansi": - get_str = get_str_ansi - elif self._enc == "utf16": - get_str = get_str_utf16 - else: - raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") - return get_str(vm, addr) - - def set(self, vm, addr, s): - """Get the string value from memory""" - if self._enc == "ansi": - set_str = set_str_ansi - elif self._enc == "utf16": - set_str = set_str_utf16 - else: - raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") - set_str(vm, addr, s) - - def size(self): - """This type is unsized.""" - raise ValueError("Str is unsized") - - @property - def enc(self): - """This Str's encoding name (as a str).""" - return self._enc - - def _get_pinned_base_class(self): - return MemStr - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, self.enc) - - def __eq__(self, other): - return self.__class__ == other.__class__ and self._enc == other._enc - - def __hash__(self): - return hash((self.__class__, self._enc)) - - -class Void(Type): - """Represents the C void type. - - Mapped to MemVoid. - """ - - def _build_pinned_type(self): - return MemVoid - - def __eq__(self, other): - return self.__class__ == other.__class__ - - def __hash__(self): - return hash(self.__class__) - - -class Self(Void): - """Special marker to reference a type inside itself. - - Mapped to MemSelf. - - Example: - class ListNode(MemStruct): - fields = [ - ("next", Ptr(", ) - - instances of this class will have properties to interract with these - fields. - - Example: - class MyStruct(MemStruct): - fields = [ - # Scalar field: just struct.pack field with one value - ("num", Num("I")), - ("flags", Num("B")), - # Ptr fields contain two fields: "val", for the numerical value, - # and "deref" to get the pointed object - ("other", Ptr("I", OtherStruct)), - # Ptr to a variable length String - ("s", Ptr("I", Str())), - ("i", Ptr("I", Num("I"))), - ] - - mstruct = MyStruct(vm, addr) - - # Field assignment modifies virtual memory - mstruct.num = 3 - assert mstruct.num == 3 - memval = struct.unpack("I", vm.get_mem(mstruct.get_addr(), - 4))[0] - assert memval == mstruct.num - - # Memset sets the whole structure - mstruct.memset() - assert mstruct.num == 0 - mstruct.memset('\x11') - assert mstruct.num == 0x11111111 - - other = OtherStruct(vm, addr2) - mstruct.other = other.get_addr() - assert mstruct.other.val == other.get_addr() - assert mstruct.other.deref == other - assert mstruct.other.deref.foo == 0x1234 - - Note that: - MyStruct = Struct("MyStruct", ).pinned - is equivalent to the previous MyStruct declaration. - - See the various Type-s doc for more information. See MemStruct.gen_fields - doc for more information on how to handle recursive types and cyclic - dependencies. - """ - __metaclass__ = _MetaMemStruct - fields = None - - def get_addr(self, field_name=None): - """ - @field_name: (str, optional) the name of the field to get the - address of - """ - if field_name is not None: - offset = self._type.get_offset(field_name) - else: - offset = 0 - return self._addr + offset - - def get_field(self, name): - """Get a field value by name. - - useless most of the time since fields are accessible via self.. - """ - return self._type.get_field(self._vm, self.get_addr(), name) - - def set_field(self, name, val): - """Set a field value by name. @val is the python value corresponding to - this field type. - - useless most of the time since fields are accessible via self.. - """ - return self._type.set_field(self._vm, self.get_addr(), name, val) - - def cast_field(self, field, other_type): - """In this implementation, @field is a field name""" - if isinstance(other_type, Type): - other_type = other_type.pinned - return other_type(self._vm, self.get_addr(field)) - - # Field generation method, voluntarily public to be able to gen fields - # after class definition - @classmethod - def gen_fields(cls, fields=None): - """Generate the fields of this class (so that they can be accessed with - self.) from a @fields list, as described in the class doc. - - Useful in case of a type cyclic dependency. For example, the following - is not possible in python: - - class A(MemStruct): - fields = [("b", Ptr("I", B))] - - class B(MemStruct): - fields = [("a", Ptr("I", A))] - - With gen_fields, the following is the legal equivalent: - - class A(MemStruct): - pass - - class B(MemStruct): - fields = [("a", Ptr("I", A))] - - A.gen_fields([("b", Ptr("I", B))]) - """ - if fields is not None: - if cls.fields is not None: - raise ValueError("Cannot regen fields of a class. Setting " - "cls.fields at class definition and calling " - "gen_fields are mutually exclusive.") - cls.fields = fields - - if cls._type is None: - if cls.fields is None: - raise ValueError("Cannot create a MemStruct subclass without" - " a cls._type or a cls.fields") - cls._type = cls._gen_type(cls.fields) - - if cls._type in DYN_MEM_STRUCT_CACHE: - # FIXME: Maybe a warning would be better? - raise RuntimeError("Another MemType has the same type as this " - "one. Use it instead.") - - # Register this class so that another one will not be created when - # calling cls._type.pinned - DYN_MEM_STRUCT_CACHE[cls._type] = cls - - cls._gen_attributes() - - @classmethod - def _gen_attributes(cls): - # Generate self. getter and setters - for name, field in cls._type.fields: - setattr(cls, name, property( - lambda self, name=name: self.get_field(name), - lambda self, val, name=name: self.set_field(name, val) - )) - - @classmethod - def _gen_type(cls, fields): - return Struct(cls.__name__, fields) - - def __repr__(self): - out = [] - for name, field in self._type.fields: - val_repr = repr(self.get_field(name)) - if '\n' in val_repr: - val_repr = '\n' + indent(val_repr, 4) - out.append("%s: %r = %s" % (name, field, val_repr)) - return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) - - -class MemUnion(MemStruct): - """Same as MemStruct but all fields have a 0 offset in the struct.""" - @classmethod - def _gen_type(cls, fields): - return Union(fields) - - -class MemBitField(MemUnion): - """MemUnion of Bits(...) fields.""" - @classmethod - def _gen_type(cls, fields): - return BitField(fields) - - -class MemSelf(MemStruct): - """Special Marker class for reference to current class in a Ptr or Array - (mostly Array of Ptr). See Self doc. - """ - def __repr__(self): - return self.__class__.__name__ - - -class MemVoid(MemType): - """Placeholder for e.g. Ptr to an undetermined type. Useful mostly when - casted to another type. Allows to implement C's "void*" pattern. - """ - _type = Void() - - def __repr__(self): - return self.__class__.__name__ - - -class MemPtr(MemValue): - """Mem version of a Ptr, provides two properties: - - val, to set and get the numeric value of the Ptr - - deref, to set and get the pointed type - """ - @property - def val(self): - return self._type.get_val(self._vm, self._addr) - - @val.setter - def val(self, value): - return self._type.set_val(self._vm, self._addr, value) - - @property - def deref(self): - return self._type.deref_get(self._vm, self._addr) - - @deref.setter - def deref(self, val): - return self._type.deref_set(self._vm, self._addr, val) - - def __repr__(self): - return "*%s" % hex(self.val) - - -class MemStr(MemValue): - """Implements a string representation in memory. - - The string value can be got or set (with python str/unicode) through the - self.val attribute. String encoding/decoding is handled by the class, - - This type is dynamically sized only (get_size is implemented, not sizeof). - """ - - def get_size(self): - """This get_size implementation is quite unsafe: it reads the string - underneath to determine the size, it may therefore read a lot of memory - and provoke mem faults (analogous to strlen). - """ - val = self.val - if self.get_type().enc == "ansi": - return len(val) + 1 - elif self.get_type().enc == "utf16": - # FIXME: real encoding... - return len(val) * 2 + 2 - else: - raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") - - def raw(self): - raw = self._vm.get_mem(self.get_addr(), self.get_size()) - return raw - - def __repr__(self): - return "%r: %r" % (self.__class__, self.val) - - -class MemArray(MemType): - """An unsized array of type @field_type (a Type subclass instance). - This class has no static or dynamic size. - - It can be indexed for setting and getting elements, example: - - array = Array(Num("I")).pinned(vm, addr)) - array[2] = 5 - array[4:8] = [0, 1, 2, 3] - print array[20] - """ - - @property - def field_type(self): - """Return the Type subclass instance that represents the type of - this MemArray items. - """ - return self.get_type().field_type - - def get_addr(self, idx=0): - return self._addr + self.get_type().get_offset(idx) - - def __getitem__(self, idx): - return self.get_type().get_item(self._vm, self._addr, idx) - - def __setitem__(self, idx, item): - self.get_type().set_item(self._vm, self._addr, idx, item) - - def raw(self): - raise ValueError("%s is unsized, which prevents from getting its full " - "raw representation. Use MemSizedArray instead." % - self.__class__) - - def __repr__(self): - return "[%r, ...] [%r]" % (self[0], self.field_type) - - -class MemSizedArray(MemArray): - """A fixed size MemArray. - - This type is dynamically sized. Generate a fixed @field_type and @array_len - array which has a static size by using Array(type, size).pinned. - """ - - @property - def array_len(self): - """The length, in number of elements, of this array.""" - return self.get_type().array_len - - def get_size(self): - return self.get_type().size() - - def __iter__(self): - for i in xrange(self.get_type().array_len): - yield self[i] - - def raw(self): - return self._vm.get_mem(self.get_addr(), self.get_size()) - - def __repr__(self): - item_reprs = [repr(item) for item in self] - if self.array_len > 0 and '\n' in item_reprs[0]: - items = '\n' + indent(',\n'.join(item_reprs), 2) + '\n' - else: - items = ', '.join(item_reprs) - return "[%s] [%r; %s]" % (items, self.field_type, self.array_len) - diff --git a/miasm2/core/types.py b/miasm2/core/types.py new file mode 100644 index 00000000..3c8d5b8b --- /dev/null +++ b/miasm2/core/types.py @@ -0,0 +1,1399 @@ +"""This module provides classes to manipulate C structures backed by a VmMngr +object (a miasm sandbox virtual memory). + +It provides two families of classes, Type-s (Num, Ptr, Str...) and their +associated MemType-s. A Type subclass instance represents a fully defined C +type. A MemType subclass instance represents a C LValue (or variable): it is +a type attached to the memory. Available types are: + + - Num: for number (float or int) handling + - Ptr: a pointer to another Type + - Struct: equivalent to a C struct definition + - Union: similar to union in C, list of Types at the same offset in a + structure; the union has the size of the biggest Type (~ Struct with all + the fields at offset 0) + - Array: an array of items of the same type; can have a fixed size or + not (e.g. char[3] vs char* used as an array in C) + - BitField: similar to C bitfields, a list of + [(, ),]; creates fields that correspond to + certain bits of the field; analogous to a Union of Bits (see Bits below) + - Str: a character string, with an encoding; not directly mapped to a C + type, it is a higher level notion provided for ease of use + - Void: analogous to C void, can be a placeholder in void*-style cases. + - Self: special marker to reference a Struct inside itself (FIXME: to + remove?) + +And some less common types: + + - Bits: mask only some bits of a Num + - RawStruct: abstraction over a simple struct pack/unpack (no mapping to a + standard C type) + +For each type, the `.pinned` property returns a MemType subclass that +allows to access the field in memory. + + +The easiest way to use the API to declare and manipulate new structures is to +subclass MemStruct and define a list of (, ): + + # FIXME: "I" => "u32" + class MyStruct(MemStruct): + fields = [ + # Scalar field: just struct.pack field with one value + ("num", Num("I")), + ("flags", Num("B")), + # Ptr fields contain two fields: "val", for the numerical value, + # and "deref" to get the pointed object + ("other", Ptr("I", OtherStruct)), + # Ptr to a variable length String + ("s", Ptr("I", Str())), + ("i", Ptr("I", Num("I"))), + ] + +And access the fields: + + mstruct = MyStruct(jitter.vm, addr) + mstruct.num = 3 + assert mstruct.num == 3 + mstruct.other.val = addr2 + # Also works: + mstruct.other = addr2 + mstruct.other.deref = OtherStruct(jitter.vm, addr) + +MemUnion and MemBitField can also be subclassed, the `fields` field being +in the format expected by, respectively, Union and BitField. + +The `addr` argument can be omited if an allocator is set, in which case the +structure will be automatically allocated in memory: + + my_heap = miasm2.os_dep.common.heap() + # the allocator is a func(VmMngr) -> integer_address + set_allocator(my_heap) + +Note that some structures (e.g. MemStr or MemArray) do not have a static +size and cannot be allocated automatically. +""" + +import logging +import struct + +log = logging.getLogger(__name__) +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + +# ALLOCATOR is a function(vm, size) -> allocated_address +# TODO: as a MemType class attribute +ALLOCATOR = None + +# Cache for dynamically generated MemTypes +DYN_MEM_STRUCT_CACHE = {} + +def set_allocator(alloc_func): + """Set an allocator for this module; allows to instanciate statically sized + MemTypes (i.e. sizeof() is implemented) without specifying the address + (the object is allocated by @alloc_func in the vm. + + @alloc_func: func(VmMngr) -> integer_address + """ + global ALLOCATOR + ALLOCATOR = alloc_func + + +# Helpers + +def indent(s, size=4): + """Indent a string with @size spaces""" + return ' '*size + ('\n' + ' '*size).join(s.split('\n')) + + +# FIXME: copied from miasm2.os_dep.common and fixed +def get_str_ansi(vm, addr, max_char=None): + """Get a null terminated ANSI encoded string from a VmMngr. + + @vm: VmMngr instance + @max_char: max number of characters to get in memory + """ + l = 0 + tmp = addr + while ((max_char is None or l < max_char) and + vm.get_mem(tmp, 1) != "\x00"): + tmp += 1 + l += 1 + return vm.get_mem(addr, l).decode("latin1") + + +# TODO: get_raw_str_utf16 for length calculus +def get_str_utf16(vm, addr, max_char=None): + """Get a (double) null terminated utf16 little endian encoded string from + a VmMngr. This encoding is mainly used in Windows. + + FIXME: the implementation do not work with codepoints that are encoded on + more than 2 bytes in utf16. + + @vm: VmMngr instance + @max_char: max number of bytes to get in memory + """ + l = 0 + tmp = addr + # TODO: test if fetching per page rather than 2 byte per 2 byte is worth it? + while ((max_char is None or l < max_char) and + vm.get_mem(tmp, 2) != "\x00\x00"): + tmp += 2 + l += 2 + s = vm.get_mem(addr, l) + return s.decode('utf-16le') + + +def set_str_ansi(vm, addr, s): + """Encode a string to null terminated ascii/ansi and set it in a VmMngr + memory. + + @vm: VmMngr instance + @addr: start address to serialize the string to + s: the str to serialize + """ + vm.set_mem(addr, s + "\x00") + + +def set_str_utf16(vm, addr, s): + """Same as set_str_ansi with (double) null terminated utf16 encoding.""" + s = (s + '\x00').encode('utf-16le') + vm.set_mem(addr, s) + + +# Type classes + +class Type(object): + """Base class to provide methods to describe a type, as well as how to set + and get fields from virtual mem. + + Each Type subclass is linked to a MemType subclass (e.g. Struct to + MemStruct, Ptr to MemPtr, etc.). + + When nothing is specified, MemValue is used to access the type in memory. + MemValue instances have one `.val` field, setting and getting it call + the set and get of the Type. + + Subclasses can either override _pack and _unpack, or get and set if data + serialization requires more work (see Struct implementation for an example). + + TODO: move any trace of vm and addr out of these classes? + """ + + _self_type = None + + def _pack(self, val): + """Serializes the python value @val to a raw str""" + raise NotImplementedError() + + def _unpack(self, raw_str): + """Deserializes a raw str to an object representing the python value + of this field. + """ + raise NotImplementedError() + + def set(self, vm, addr, val): + """Set a VmMngr memory from a value. + + @vm: VmMngr instance + @addr: the start adress in memory to set + @val: the python value to serialize in @vm at @addr + """ + raw = self._pack(val) + vm.set_mem(addr, raw) + + def get(self, vm, addr): + """Get the python value of a field from a VmMngr memory at @addr.""" + raw = vm.get_mem(addr, self.size()) + return self._unpack(raw) + + @property + def pinned(self): + """Returns a class with a (vm, addr) constructor that allows to + interact with this type in memory. + + @return: a MemType subclass. + """ + if self in DYN_MEM_STRUCT_CACHE: + return DYN_MEM_STRUCT_CACHE[self] + pinned_type = self._build_pinned_type() + DYN_MEM_STRUCT_CACHE[self] = pinned_type + return pinned_type + + def _build_pinned_type(self): + """Builds the MemType subclass allowing to interract with this type. + + Called by self.pinned when it is not in cache. + """ + pinned_base_class = self._get_pinned_base_class() + pinned_type = type("Mem%r" % self, (pinned_base_class,), + {'_type': self}) + return pinned_type + + def _get_pinned_base_class(self): + """Return the MemType subclass that maps this type in memory""" + return MemValue + + def _get_self_type(self): + """Used for the Self trick.""" + return self._self_type + + def _set_self_type(self, self_type): + """If this field refers to MemSelf/Self, replace it with @self_type + (a MemType subclass) when using it. Generally not used outside this + module. + """ + self._self_type = self_type + + def size(self): + """Return the size in bytes of the serialized version of this field""" + raise NotImplementedError() + + def __len__(self): + return self.size() + + def __neq__(self, other): + return not self == other + + +class RawStruct(Type): + """Dumb struct.pack/unpack field. Mainly used to factorize code. + + Value is a tuple corresponding to the struct @fmt passed to the constructor. + """ + + def __init__(self, fmt): + self._fmt = fmt + + def _pack(self, fields): + return struct.pack(self._fmt, *fields) + + def _unpack(self, raw_str): + return struct.unpack(self._fmt, raw_str) + + def size(self): + return struct.calcsize(self._fmt) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, self._fmt) + + def __eq__(self, other): + return self.__class__ == other.__class__ and self._fmt == other._fmt + + def __hash__(self): + return hash((self.__class__, self._fmt)) + + +class Num(RawStruct): + """Represents a number (integer or float). The number is encoded with + a struct-style format which must represent only one value. + + TODO: use u32, i16, etc. for format. + """ + + def _pack(self, number): + return super(Num, self)._pack([number]) + + def _unpack(self, raw_str): + upck = super(Num, self)._unpack(raw_str) + if len(upck) != 1: + raise ValueError("Num format string unpacks to multiple values, " + "should be 1") + return upck[0] + + +class Ptr(Num): + """Special case of number of which value indicates the address of a + MemType. + + Mapped to MemPtr (see its doc for more info): + + assert isinstance(mystruct.ptr, MemPtr) + mystruct.ptr = 0x4000 # Assign the Ptr numeric value + mystruct.ptr.val = 0x4000 # Also assigns the Ptr numeric value + assert isinstance(mystruct.ptr.val, int) # Get the Ptr numeric value + mystruct.ptr.deref # Get the pointed MemType + mystruct.ptr.deref = other # Set the pointed MemType + """ + + def __init__(self, fmt, dst_type, *type_args, **type_kwargs): + """ + @fmt: (str) Num compatible format that will be the Ptr representation + in memory + @dst_type: (MemType or Type) the MemType this Ptr points to. + If a Type is given, it is transformed into a MemType with + TheType.pinned. + *type_args, **type_kwargs: arguments to pass to the the pointed + MemType when instanciating it (e.g. for MemStr encoding or + MemArray field_type). + """ + if (not isinstance(dst_type, Type) and + not (isinstance(dst_type, type) and + issubclass(dst_type, MemType)) and + not dst_type == MemSelf): + raise ValueError("dst_type of Ptr must be a MemType type, a " + "Type instance, the MemSelf marker or a class " + "name.") + super(Ptr, self).__init__(fmt) + if isinstance(dst_type, Type): + # Patch the field to propagate the MemSelf replacement + dst_type._get_self_type = lambda: self._get_self_type() + # dst_type cannot be patched here, since _get_self_type of the outer + # class has not yet been set. Patching dst_type involves calling + # dst_type.pinned, which will only return a type that does not point + # on MemSelf but on the right class only when _get_self_type of the + # outer class has been replaced by _MetaMemStruct. + # In short, dst_type = dst_type.pinned is not valid here, it is done + # lazily in _fix_dst_type + self._dst_type = dst_type + self._type_args = type_args + self._type_kwargs = type_kwargs + + def _fix_dst_type(self): + if self._dst_type == MemSelf: + if self._get_self_type() is not None: + self._dst_type = self._get_self_type() + else: + raise ValueError("Unsupported usecase for MemSelf, sorry") + if isinstance(self._dst_type, Type): + self._dst_type = self._dst_type.pinned + + @property + def dst_type(self): + """Return the type (MemType subtype) this Ptr points to.""" + self._fix_dst_type() + return self._dst_type + + def set(self, vm, addr, val): + """A Ptr field can be set with a MemPtr or an int""" + if isinstance(val, MemType) and isinstance(val.get_type(), Ptr): + self.set_val(vm, addr, val.val) + else: + super(Ptr, self).set(vm, addr, val) + + def get(self, vm, addr): + return self.pinned(vm, addr) + + def get_val(self, vm, addr): + """Get the numeric value of a Ptr""" + return super(Ptr, self).get(vm, addr) + + def set_val(self, vm, addr, val): + """Set the numeric value of a Ptr""" + return super(Ptr, self).set(vm, addr, val) + + def deref_get(self, vm, addr): + """Deserializes the data in @vm (VmMngr) at @addr to self.dst_type. + Equivalent to a pointer dereference rvalue in C. + """ + dst_addr = self.get_val(vm, addr) + return self.dst_type(vm, dst_addr, + *self._type_args, **self._type_kwargs) + + def deref_set(self, vm, addr, val): + """Serializes the @val MemType subclass instance in @vm (VmMngr) at + @addr. Equivalent to a pointer dereference assignment in C. + """ + # Sanity check + if self.dst_type != val.__class__: + log.warning("Original type was %s, overriden by value of type %s", + self._dst_type.__name__, val.__class__.__name__) + + # Actual job + dst_addr = self.get_val(vm, addr) + vm.set_mem(dst_addr, str(val)) + + def _get_pinned_base_class(self): + return MemPtr + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self.dst_type.get_type()) + + def __eq__(self, other): + return super(Ptr, self).__eq__(other) and \ + self.dst_type == other.dst_type and \ + self._type_args == other._type_args and \ + self._type_kwargs == other._type_kwargs + + def __hash__(self): + return hash((super(Ptr, self).__hash__(), self.dst_type, + self._type_args)) + + +class Struct(Type): + """Equivalent to a C struct type. Composed of a name, and a + (, ) list describing the fields + of the struct. + + Mapped to MemStruct. + + NOTE: The `.pinned` property of Struct creates classes on the fly. If an + equivalent structure is created by subclassing MemStruct, an exception + is raised to prevent creating multiple classes designating the same type. + + Example: + s = Struct("Toto", [("f1", Num("I")), ("f2", Num("I"))]) + + Toto1 = s.pinned + + # This raises an exception, because it describes the same structure as + # Toto1 + class Toto(MemStruct): + fields = [("f1", Num("I")), ("f2", Num("I"))] + """ + + def __init__(self, name, fields): + self.name = name + # fields is immutable + self._fields = tuple(fields) + self._gen_fields() + + def _gen_fields(self): + """Precompute useful metadata on self.fields.""" + self._fields_desc = {} + offset = 0 + for name, field in self._fields: + # For reflexion + field._set_self_type(self) + self._fields_desc[name] = {"field": field, "offset": offset} + offset += field.size() + + @property + def fields(self): + return self._fields + + def set(self, vm, addr, val): + raw = str(val) + vm.set_mem(addr, raw) + + def get(self, vm, addr): + return self.pinned(vm, addr) + + def get_field(self, vm, addr, name): + """Get a field value by @name and base structure @addr in @vm VmMngr.""" + if name not in self._fields_desc: + raise ValueError("'%s' type has no field '%s'" % (self, name)) + field = self.get_field_type(name) + offset = self.get_offset(name) + return field.get(vm, addr + offset) + + def set_field(self, vm, addr, name, val): + """Set a field value by @name and base structure @addr in @vm VmMngr. + @val is the python value corresponding to this field type. + """ + if name not in self._fields_desc: + raise AttributeError("'%s' object has no attribute '%s'" + % (self.__class__.__name__, name)) + field = self.get_field_type(name) + offset = self.get_offset(name) + field.set(vm, addr + offset, val) + + def size(self): + return sum(field.size() for _, field in self.fields) + + def get_offset(self, field_name): + """ + @field_name: (str, optional) the name of the field to get the + offset of + """ + if field_name not in self._fields_desc: + raise ValueError("This structure has no %s field" % field_name) + return self._fields_desc[field_name]['offset'] + + def get_field_type(self, name): + """Return the Type subclass instance describing field @name.""" + return self._fields_desc[name]['field'] + + def _get_pinned_base_class(self): + return MemStruct + + def __repr__(self): + return "struct %s" % self.name + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self.fields == other.fields and \ + self.name == other.name + + def __hash__(self): + # Only hash name, not fields, because if a field is a Ptr to this + # Struct type, an infinite loop occurs + return hash((self.__class__, self.name)) + + +class Union(Struct): + """Represents a C union. + + Allows to put multiple fields at the same offset in a MemStruct, + similar to unions in C. The Union will have the size of the largest of its + fields. + + Mapped to MemUnion. + + Example: + + class Example(MemStruct): + fields = [("uni", Union([ + ("f1", Num("= self.size()): + raise IndexError("Index %s out of bounds" % idx) + + def _get_pinned_base_class(self): + if self.is_sized(): + return MemSizedArray + else: + return MemArray + + def __repr__(self): + return "%r[%s]" % (self.field_type, self.array_len or "unsized") + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self.field_type == other.field_type and \ + self.array_len == other.array_len + + def __hash__(self): + return hash((self.__class__, self.field_type, self.array_len)) + + +class Bits(Type): + """Helper class for BitField, not very useful on its own. Represents some + bits of a Num. + + The @backing_num is used to know how to serialize/deserialize data in vm, + but getting/setting this fields only affects bits from @bit_offset to + @bit_offset + @bits. Masking and shifting is handled by the class, the aim + is to provide a transparent way to set and get some bits of a num. + """ + + def __init__(self, backing_num, bits, bit_offset): + if not isinstance(backing_num, Num): + raise ValueError("backing_num should be a Num instance") + self._num = backing_num + self._bits = bits + self._bit_offset = bit_offset + + def set(self, vm, addr, val): + val_mask = (1 << self._bits) - 1 + val_shifted = (val & val_mask) << self._bit_offset + num_size = self._num.size() * 8 + + full_num_mask = (1 << num_size) - 1 + num_mask = (~(val_mask << self._bit_offset)) & full_num_mask + + num_val = self._num.get(vm, addr) + res_val = (num_val & num_mask) | val_shifted + self._num.set(vm, addr, res_val) + + def get(self, vm, addr): + val_mask = (1 << self._bits) - 1 + num_val = self._num.get(vm, addr) + res_val = (num_val >> self._bit_offset) & val_mask + return res_val + + def size(self): + return self._num.size() + + @property + def bit_size(self): + """Number of bits read/written by this class""" + return self._bits + + @property + def bit_offset(self): + """Offset in bits (beginning at 0, the LSB) from which to read/write + bits. + """ + return self._bit_offset + + def __repr__(self): + return "%s%r(%d:%d)" % (self.__class__.__name__, self._num, + self._bit_offset, self._bit_offset + self._bits) + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self._num == other._num and self._bits == other._bits and \ + self._bit_offset == other._bit_offset + + def __hash__(self): + return hash((self.__class__, self._num, self._bits, self._bit_offset)) + + +class BitField(Union): + """A C-like bitfield. + + Constructed with a list [(, )] and a + @backing_num. The @backing_num is a Num instance that determines the total + size of the bitfield and the way the bits are serialized/deserialized (big + endian int, little endian short...). Can be seen (and implemented) as a + Union of Bits fields. + + Mapped to MemBitField. + + Creates fields that allow to access the bitfield fields easily. Example: + + class Example(MemStruct): + fields = [("bf", BitField(Num("B"), [ + ("f1", 2), + ("f2", 4), + ("f3", 1) + ]) + )] + + ex = Example(vm, addr) + ex.memset() + ex.f2 = 2 + ex.f1 = 5 # 5 does not fit on two bits, it will be binarily truncated + assert ex.f1 == 3 + assert ex.f2 == 2 + assert ex.f3 == 0 # previously memset() + assert ex.bf == 3 + 2 << 2 + """ + + def __init__(self, backing_num, bit_list): + """@backing num: Num intance, @bit_list: [(name, n_bits)]""" + self._num = backing_num + fields = [] + offset = 0 + for name, bits in bit_list: + fields.append((name, Bits(self._num, bits, offset))) + offset += bits + if offset > self._num.size() * 8: + raise ValueError("sum of bit lengths is > to the backing num size") + super(BitField, self).__init__(fields) + + def set(self, vm, addr, val): + self._num.set(vm, addr, val) + + def _get_pinned_base_class(self): + return MemBitField + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self._num == other._num and super(BitField, self).__eq__(other) + + def __hash__(self): + return hash((super(BitField, self).__hash__(), self._num)) + + def __repr__(self): + fields_repr = ', '.join("%s: %r" % (name, field.bit_size) + for name, field in self.fields) + return "%s(%s)" % (self.__class__.__name__, fields_repr) + + +class Str(Type): + """A string type that handles encoding. This type is unsized (no static + size). + + The @encoding is passed to the constructor, and is currently either null + terminated "ansi" (latin1) or (double) null terminated "utf16". Be aware + that the utf16 implementation is a bit buggy... + + Mapped to MemStr. + """ + + def __init__(self, encoding="ansi"): + # TODO: encoding as lambda + if encoding not in ["ansi", "utf16"]: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + self._enc = encoding + + def get(self, vm, addr): + """Set the string value in memory""" + if self._enc == "ansi": + get_str = get_str_ansi + elif self._enc == "utf16": + get_str = get_str_utf16 + else: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + return get_str(vm, addr) + + def set(self, vm, addr, s): + """Get the string value from memory""" + if self._enc == "ansi": + set_str = set_str_ansi + elif self._enc == "utf16": + set_str = set_str_utf16 + else: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + set_str(vm, addr, s) + + def size(self): + """This type is unsized.""" + raise ValueError("Str is unsized") + + @property + def enc(self): + """This Str's encoding name (as a str).""" + return self._enc + + def _get_pinned_base_class(self): + return MemStr + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, self.enc) + + def __eq__(self, other): + return self.__class__ == other.__class__ and self._enc == other._enc + + def __hash__(self): + return hash((self.__class__, self._enc)) + + +class Void(Type): + """Represents the C void type. + + Mapped to MemVoid. + """ + + def _build_pinned_type(self): + return MemVoid + + def __eq__(self, other): + return self.__class__ == other.__class__ + + def __hash__(self): + return hash(self.__class__) + + +class Self(Void): + """Special marker to reference a type inside itself. + + Mapped to MemSelf. + + Example: + class ListNode(MemStruct): + fields = [ + ("next", Ptr(", ) + - instances of this class will have properties to interract with these + fields. + + Example: + class MyStruct(MemStruct): + fields = [ + # Scalar field: just struct.pack field with one value + ("num", Num("I")), + ("flags", Num("B")), + # Ptr fields contain two fields: "val", for the numerical value, + # and "deref" to get the pointed object + ("other", Ptr("I", OtherStruct)), + # Ptr to a variable length String + ("s", Ptr("I", Str())), + ("i", Ptr("I", Num("I"))), + ] + + mstruct = MyStruct(vm, addr) + + # Field assignment modifies virtual memory + mstruct.num = 3 + assert mstruct.num == 3 + memval = struct.unpack("I", vm.get_mem(mstruct.get_addr(), + 4))[0] + assert memval == mstruct.num + + # Memset sets the whole structure + mstruct.memset() + assert mstruct.num == 0 + mstruct.memset('\x11') + assert mstruct.num == 0x11111111 + + other = OtherStruct(vm, addr2) + mstruct.other = other.get_addr() + assert mstruct.other.val == other.get_addr() + assert mstruct.other.deref == other + assert mstruct.other.deref.foo == 0x1234 + + Note that: + MyStruct = Struct("MyStruct", ).pinned + is equivalent to the previous MyStruct declaration. + + See the various Type-s doc for more information. See MemStruct.gen_fields + doc for more information on how to handle recursive types and cyclic + dependencies. + """ + __metaclass__ = _MetaMemStruct + fields = None + + def get_addr(self, field_name=None): + """ + @field_name: (str, optional) the name of the field to get the + address of + """ + if field_name is not None: + offset = self._type.get_offset(field_name) + else: + offset = 0 + return self._addr + offset + + def get_field(self, name): + """Get a field value by name. + + useless most of the time since fields are accessible via self.. + """ + return self._type.get_field(self._vm, self.get_addr(), name) + + def set_field(self, name, val): + """Set a field value by name. @val is the python value corresponding to + this field type. + + useless most of the time since fields are accessible via self.. + """ + return self._type.set_field(self._vm, self.get_addr(), name, val) + + def cast_field(self, field, other_type): + """In this implementation, @field is a field name""" + if isinstance(other_type, Type): + other_type = other_type.pinned + return other_type(self._vm, self.get_addr(field)) + + # Field generation method, voluntarily public to be able to gen fields + # after class definition + @classmethod + def gen_fields(cls, fields=None): + """Generate the fields of this class (so that they can be accessed with + self.) from a @fields list, as described in the class doc. + + Useful in case of a type cyclic dependency. For example, the following + is not possible in python: + + class A(MemStruct): + fields = [("b", Ptr("I", B))] + + class B(MemStruct): + fields = [("a", Ptr("I", A))] + + With gen_fields, the following is the legal equivalent: + + class A(MemStruct): + pass + + class B(MemStruct): + fields = [("a", Ptr("I", A))] + + A.gen_fields([("b", Ptr("I", B))]) + """ + if fields is not None: + if cls.fields is not None: + raise ValueError("Cannot regen fields of a class. Setting " + "cls.fields at class definition and calling " + "gen_fields are mutually exclusive.") + cls.fields = fields + + if cls._type is None: + if cls.fields is None: + raise ValueError("Cannot create a MemStruct subclass without" + " a cls._type or a cls.fields") + cls._type = cls._gen_type(cls.fields) + + if cls._type in DYN_MEM_STRUCT_CACHE: + # FIXME: Maybe a warning would be better? + raise RuntimeError("Another MemType has the same type as this " + "one. Use it instead.") + + # Register this class so that another one will not be created when + # calling cls._type.pinned + DYN_MEM_STRUCT_CACHE[cls._type] = cls + + cls._gen_attributes() + + @classmethod + def _gen_attributes(cls): + # Generate self. getter and setters + for name, field in cls._type.fields: + setattr(cls, name, property( + lambda self, name=name: self.get_field(name), + lambda self, val, name=name: self.set_field(name, val) + )) + + @classmethod + def _gen_type(cls, fields): + return Struct(cls.__name__, fields) + + def __repr__(self): + out = [] + for name, field in self._type.fields: + val_repr = repr(self.get_field(name)) + if '\n' in val_repr: + val_repr = '\n' + indent(val_repr, 4) + out.append("%s: %r = %s" % (name, field, val_repr)) + return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) + + +class MemUnion(MemStruct): + """Same as MemStruct but all fields have a 0 offset in the struct.""" + @classmethod + def _gen_type(cls, fields): + return Union(fields) + + +class MemBitField(MemUnion): + """MemUnion of Bits(...) fields.""" + @classmethod + def _gen_type(cls, fields): + return BitField(fields) + + +class MemSelf(MemStruct): + """Special Marker class for reference to current class in a Ptr or Array + (mostly Array of Ptr). See Self doc. + """ + def __repr__(self): + return self.__class__.__name__ + + +class MemVoid(MemType): + """Placeholder for e.g. Ptr to an undetermined type. Useful mostly when + casted to another type. Allows to implement C's "void*" pattern. + """ + _type = Void() + + def __repr__(self): + return self.__class__.__name__ + + +class MemPtr(MemValue): + """Mem version of a Ptr, provides two properties: + - val, to set and get the numeric value of the Ptr + - deref, to set and get the pointed type + """ + @property + def val(self): + return self._type.get_val(self._vm, self._addr) + + @val.setter + def val(self, value): + return self._type.set_val(self._vm, self._addr, value) + + @property + def deref(self): + return self._type.deref_get(self._vm, self._addr) + + @deref.setter + def deref(self, val): + return self._type.deref_set(self._vm, self._addr, val) + + def __repr__(self): + return "*%s" % hex(self.val) + + +class MemStr(MemValue): + """Implements a string representation in memory. + + The string value can be got or set (with python str/unicode) through the + self.val attribute. String encoding/decoding is handled by the class, + + This type is dynamically sized only (get_size is implemented, not sizeof). + """ + + def get_size(self): + """This get_size implementation is quite unsafe: it reads the string + underneath to determine the size, it may therefore read a lot of memory + and provoke mem faults (analogous to strlen). + """ + val = self.val + if self.get_type().enc == "ansi": + return len(val) + 1 + elif self.get_type().enc == "utf16": + # FIXME: real encoding... + return len(val) * 2 + 2 + else: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + + def raw(self): + raw = self._vm.get_mem(self.get_addr(), self.get_size()) + return raw + + def __repr__(self): + return "%r: %r" % (self.__class__, self.val) + + +class MemArray(MemType): + """An unsized array of type @field_type (a Type subclass instance). + This class has no static or dynamic size. + + It can be indexed for setting and getting elements, example: + + array = Array(Num("I")).pinned(vm, addr)) + array[2] = 5 + array[4:8] = [0, 1, 2, 3] + print array[20] + """ + + @property + def field_type(self): + """Return the Type subclass instance that represents the type of + this MemArray items. + """ + return self.get_type().field_type + + def get_addr(self, idx=0): + return self._addr + self.get_type().get_offset(idx) + + def __getitem__(self, idx): + return self.get_type().get_item(self._vm, self._addr, idx) + + def __setitem__(self, idx, item): + self.get_type().set_item(self._vm, self._addr, idx, item) + + def raw(self): + raise ValueError("%s is unsized, which prevents from getting its full " + "raw representation. Use MemSizedArray instead." % + self.__class__) + + def __repr__(self): + return "[%r, ...] [%r]" % (self[0], self.field_type) + + +class MemSizedArray(MemArray): + """A fixed size MemArray. + + This type is dynamically sized. Generate a fixed @field_type and @array_len + array which has a static size by using Array(type, size).pinned. + """ + + @property + def array_len(self): + """The length, in number of elements, of this array.""" + return self.get_type().array_len + + def get_size(self): + return self.get_type().size() + + def __iter__(self): + for i in xrange(self.get_type().array_len): + yield self[i] + + def raw(self): + return self._vm.get_mem(self.get_addr(), self.get_size()) + + def __repr__(self): + item_reprs = [repr(item) for item in self] + if self.array_len > 0 and '\n' in item_reprs[0]: + items = '\n' + indent(',\n'.join(item_reprs), 2) + '\n' + else: + items = ', '.join(item_reprs) + return "[%s] [%r; %s]" % (items, self.field_type, self.array_len) + diff --git a/test/analysis/mem.py b/test/analysis/mem.py deleted file mode 100644 index 6c7fc9e3..00000000 --- a/test/analysis/mem.py +++ /dev/null @@ -1,506 +0,0 @@ -#!/usr/bin/env python - -# miasm2.analysis.mem tests - -import struct - -from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import MemStruct, Num, Ptr, Str, \ - Array, RawStruct, Union, \ - BitField, Self, Void, Bits, \ - set_allocator, MemUnion, Struct -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE -from miasm2.os_dep.common import heap - -# Two structures with some fields -class OtherStruct(MemStruct): - fields = [ - ("foo", Num("H")), - ] - -class MyStruct(MemStruct): - fields = [ - # Number field: just struct.pack fields with one value - ("num", Num("I")), - ("flags", Num("B")), - # This field is a pointer to another struct, it has a numeric - # value (mystruct.other.val) and can be dereferenced to get an - # OtherStruct instance (mystruct.other.deref) - ("other", Ptr("I", OtherStruct)), - # Ptr to a variable length String - ("s", Ptr("I", Str())), - ("i", Ptr("I", Num("I"))), - ] - -jitter = Machine("x86_32").jitter("python") -jitter.init_stack() -addr = 0x1000 -size = 0x1000 -addr_str = 0x1100 -addr_str2 = 0x1200 -addr_str3 = 0x1300 -# Initialize all mem with 0xaa -jitter.vm.add_memory_page(addr, PAGE_READ | PAGE_WRITE, "\xaa"*size) - - -# MemStruct tests -## Creation -# Use manual allocation with explicit addr for the first example -mstruct = MyStruct(jitter.vm, addr) -## Fields are read from the virtual memory -assert mstruct.num == 0xaaaaaaaa -assert mstruct.flags == 0xaa - -## Field assignment modifies virtual memory -mstruct.num = 3 -assert mstruct.num == 3 -memval = struct.unpack("I", jitter.vm.get_mem(mstruct.get_addr(), 4))[0] -assert memval == 3 - -## Memset sets the whole structure -mstruct.memset() -assert mstruct.num == 0 -assert mstruct.flags == 0 -assert mstruct.other.val == 0 -assert mstruct.s.val == 0 -assert mstruct.i.val == 0 -mstruct.memset('\x11') -assert mstruct.num == 0x11111111 -assert mstruct.flags == 0x11 -assert mstruct.other.val == 0x11111111 -assert mstruct.s.val == 0x11111111 -assert mstruct.i.val == 0x11111111 - - -# From now, just use heap.vm_alloc -my_heap = heap() -set_allocator(my_heap.vm_alloc) - - -# Ptr tests -## Setup for Ptr tests -# the addr field can now be omited since allocator is set -other = OtherStruct(jitter.vm) -other.foo = 0x1234 -assert other.foo == 0x1234 - -## Basic usage -mstruct.other.val = other.get_addr() -# This also works for now: -# mstruct.other = other.get_addr() -assert mstruct.other.val == other.get_addr() -assert mstruct.other.deref == other -assert mstruct.other.deref.foo == 0x1234 - -## Deref assignment -other2 = OtherStruct(jitter.vm) -other2.foo = 0xbeef -assert mstruct.other.deref != other2 -mstruct.other.deref = other2 -assert mstruct.other.deref == other2 -assert mstruct.other.deref.foo == 0xbeef -assert mstruct.other.val == other.get_addr() # Addr did not change -assert other.foo == 0xbeef # Deref assignment copies by value -assert other2.foo == 0xbeef -assert other.get_addr() != other2.get_addr() # Not the same address -assert other == other2 # But same value - -## Same stuff for Ptr to MemField -alloc_addr = my_heap.vm_alloc(jitter.vm, - mstruct.get_type().get_field_type("i") - .dst_type.sizeof()) -mstruct.i = alloc_addr -mstruct.i.deref.val = 8 -assert mstruct.i.deref.val == 8 -assert mstruct.i.val == alloc_addr -memval = struct.unpack("I", jitter.vm.get_mem(alloc_addr, 4))[0] -assert memval == 8 - - -# Str tests -## Basic tests -memstr = Str().pinned(jitter.vm, addr_str) -memstr.val = "" -assert memstr.val == "" -assert jitter.vm.get_mem(memstr.get_addr(), 1) == '\x00' -memstr.val = "lala" -assert jitter.vm.get_mem(memstr.get_addr(), memstr.get_size()) == 'lala\x00' -jitter.vm.set_mem(memstr.get_addr(), 'MIAMs\x00') -assert memstr.val == 'MIAMs' - -## Ptr(Str()) manipulations -mstruct.s.val = memstr.get_addr() -assert mstruct.s.val == addr_str -assert mstruct.s.deref == memstr -assert mstruct.s.deref.val == 'MIAMs' -mstruct.s.deref.val = "That's all folks!" -assert mstruct.s.deref.val == "That's all folks!" -assert memstr.val == "That's all folks!" - -## Other address, same value, same encoding -memstr2 = Str().pinned(jitter.vm, addr_str2) -memstr2.val = "That's all folks!" -assert memstr2.get_addr() != memstr.get_addr() -assert memstr2 == memstr - -## Same value, other encoding -memstr3 = Str("utf16").pinned(jitter.vm, addr_str3) -memstr3.val = "That's all folks!" -assert memstr3.get_addr() != memstr.get_addr() -assert memstr3.get_size() != memstr.get_size() # Size is different -assert str(memstr3) != str(memstr) # Mem representation is different -assert memstr3 != memstr # Encoding is different, so they are not eq -assert memstr3.val == memstr.val # But the python value is the same - - -# Array tests -# Allocate buffer manually, since memarray is unsized -alloc_addr = my_heap.vm_alloc(jitter.vm, 0x100) -memarray = Array(Num("I")).pinned(jitter.vm, alloc_addr) -memarray[0] = 0x02 -assert memarray[0] == 0x02 -assert jitter.vm.get_mem(memarray.get_addr(), - Num("I").size()) == '\x02\x00\x00\x00' -memarray[2] = 0xbbbbbbbb -assert memarray[2] == 0xbbbbbbbb -assert jitter.vm.get_mem(memarray.get_addr() + 2 * Num("I").size(), - Num("I").size()) == '\xbb\xbb\xbb\xbb' -try: - s = str(memarray) - assert False, "Should raise" -except (NotImplementedError, ValueError): - pass -try: - s = len(memarray) - assert False, "Should raise" -except (NotImplementedError, ValueError): - pass - -## Slice assignment -memarray[2:4] = [3, 3] -assert memarray[2] == 3 -assert memarray[3] == 3 -assert memarray[2:4] == [3, 3] -try: - memarray[2:4] = [3, 3, 3] - assert False, "Should raise, mismatched sizes" -except ValueError: - pass - - -memsarray = Array(Num("I"), 10).pinned(jitter.vm) -# And Array(type, size).pinned generates statically sized types -assert memsarray.sizeof() == Num("I").size() * 10 -memsarray.memset('\xcc') -assert memsarray[0] == 0xcccccccc -assert len(memsarray) == 10 * 4 -assert str(memsarray) == '\xcc' * (4 * 10) -for val in memsarray: - assert val == 0xcccccccc -assert list(memsarray) == [0xcccccccc] * 10 -memsarray[0] = 2 -assert memsarray[0] == 2 -assert str(memsarray) == '\x02\x00\x00\x00' + '\xcc' * (4 * 9) - - -# Atypical fields (RawStruct and Array) -class MyStruct2(MemStruct): - fields = [ - ("s1", RawStruct("=BI")), - ("s2", Array(Num("B"), 10)), - ] - -ms2 = MyStruct2(jitter.vm) -ms2.memset('\xaa') -assert len(ms2) == 15 - -## RawStruct -assert len(ms2.s1) == 2 -assert ms2.s1[0] == 0xaa -assert ms2.s1[1] == 0xaaaaaaaa - -## Array -### Basic checks -assert len(ms2.s2) == 10 -for val in ms2.s2: - assert val == 0xaa -assert ms2.s2[0] == 0xaa -assert ms2.s2[9] == 0xaa - -### Subscript assignment -ms2.s2[3] = 2 -assert ms2.s2[3] == 2 - -### Field assignment (list) -ms2.s2 = [1] * 10 -for val in ms2.s2: - assert val == 1 - -### Field assignment (MemSizedArray) -array2 = Array(Num("B"), 10).pinned(jitter.vm) -jitter.vm.set_mem(array2.get_addr(), '\x02'*10) -for val in array2: - assert val == 2 -ms2.s2 = array2 -for val in ms2.s2: - assert val == 2 - - -# Inlining a MemType tests -class InStruct(MemStruct): - fields = [ - ("foo", Num("B")), - ("bar", Num("B")), - ] - -class ContStruct(MemStruct): - fields = [ - ("one", Num("B")), - ("instruct", InStruct.get_type()), - ("last", Num("B")), - ] - -cont = ContStruct(jitter.vm) -cont.memset() -assert len(cont) == 4 -assert len(cont.instruct) == 2 -assert cont.one == 0 -assert cont.last == 0 -assert cont.instruct.foo == 0 -assert cont.instruct.bar == 0 -cont.memset('\x11') -assert cont.one == 0x11 -assert cont.last == 0x11 -assert cont.instruct.foo == 0x11 -assert cont.instruct.bar == 0x11 - -cont.one = 0x01 -cont.instruct.foo = 0x02 -cont.instruct.bar = 0x03 -cont.last = 0x04 -assert cont.one == 0x01 -assert cont.instruct.foo == 0x02 -assert cont.instruct.bar == 0x03 -assert cont.last == 0x04 -assert jitter.vm.get_mem(cont.get_addr(), len(cont)) == '\x01\x02\x03\x04' - - -# Union test -class UniStruct(MemStruct): - fields = [ - ("one", Num("B")), - ("union", Union([ - ("instruct", InStruct.get_type()), - ("i", Num(">I")), - ])), - ("last", Num("B")), - ] - -uni = UniStruct(jitter.vm) -jitter.vm.set_mem(uni.get_addr(), ''.join(chr(x) for x in xrange(len(uni)))) -assert len(uni) == 6 # 1 + max(InStruct.sizeof(), 4) + 1 -assert uni.one == 0x00 -assert uni.union.instruct.foo == 0x01 -assert uni.union.instruct.bar == 0x02 -assert uni.union.i == 0x01020304 -assert uni.last == 0x05 -uni.union.instruct.foo = 0x02 -assert uni.union.i == 0x02020304 -uni.union.i = 0x11223344 -assert uni.union.instruct.foo == 0x11 -assert uni.union.instruct.bar == 0x22 - - -# BitField test -class BitStruct(MemUnion): - fields = [ - ("flags_num", Num("H")), - ("flags", BitField(Num("H"), [ - ("f1_1", 1), - ("f2_5", 5), - ("f3_8", 8), - ("f4_1", 1), - ])), - ] - -bit = BitStruct(jitter.vm) -bit.memset() -assert bit.flags_num == 0 -assert bit.flags.f1_1 == 0 -assert bit.flags.f2_5 == 0 -assert bit.flags.f3_8 == 0 -assert bit.flags.f4_1 == 0 -bit.flags.f1_1 = 1 -bit.flags.f2_5 = 0b10101 -bit.flags.f3_8 = 0b10000001 -assert bit.flags_num == 0b0010000001101011 -assert bit.flags.f1_1 == 1 -assert bit.flags.f2_5 == 0b10101 -assert bit.flags.f3_8 == 0b10000001 -assert bit.flags.f4_1 == 0 -bit.flags_num = 0b1101010101011100 -assert bit.flags.f1_1 == 0 -assert bit.flags.f2_5 == 0b01110 -assert bit.flags.f3_8 == 0b01010101 -assert bit.flags.f4_1 == 1 - - -# Unhealthy ideas -class UnhealthyIdeas(MemStruct): - fields = [ - ("pastruct", Ptr("I", Array(RawStruct("=Bf")))), - ("apstr", Array(Ptr("I", Str()), 10)), - ("pself", Ptr("I", Self())), - ("apself", Array(Ptr("I", Self()), 2)), - ("ppself", Ptr("I", Ptr("I", Self()))), - ("pppself", Ptr("I", Ptr("I", Ptr("I", Self())))), - ] - -p_size = Ptr("I", Void()).size() - -ideas = UnhealthyIdeas(jitter.vm) -ideas.memset() -ideas.pself = ideas.get_addr() -assert ideas == ideas.pself.deref - -ideas.apself[0] = ideas.get_addr() -assert ideas.apself[0].deref == ideas -ideas.apself[1] = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) -ideas.apself[1].deref = ideas -assert ideas.apself[1] != ideas.get_addr() -assert ideas.apself[1].deref == ideas - -ideas.ppself = my_heap.vm_alloc(jitter.vm, p_size) -ideas.ppself.deref.val = ideas.get_addr() -assert ideas.ppself.deref.val == ideas.get_addr() -assert ideas.ppself.deref.deref == ideas - -ideas.ppself.deref.val = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) -ideas.ppself.deref.deref = ideas -assert ideas.ppself.deref.val != ideas.get_addr() -assert ideas.ppself.deref.deref == ideas - -ideas.pppself = my_heap.vm_alloc(jitter.vm, p_size) -ideas.pppself.deref.val = my_heap.vm_alloc(jitter.vm, p_size) -ideas.pppself.deref.deref.val = ideas.get_addr() -assert ideas.pppself.deref.deref.deref == ideas - - -# Circular dependencies -class A(MemStruct): - pass - -class B(MemStruct): - fields = [("a", Ptr("I", A)),] - -# Gen A's fields after declaration -A.gen_fields([("b", Ptr("I", B)),]) - -a = A(jitter.vm) -b = B(jitter.vm) -a.b.val = b.get_addr() -b.a.val = a.get_addr() -assert a.b.deref == b -assert b.a.deref == a - - -# Cast tests -# MemStruct cast -MemInt = Num("I").pinned -MemShort = Num("H").pinned -dword = MemInt(jitter.vm) -dword.val = 0x12345678 -assert isinstance(dword.cast(MemShort), MemShort) -assert dword.cast(MemShort).val == 0x5678 - -# Field cast -ms2.s2[0] = 0x34 -ms2.s2[1] = 0x12 -assert ms2.cast_field("s2", MemShort).val == 0x1234 - -# Other method -assert MemShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 - -# Manual cast inside an Array -ms2.s2[4] = 0xcd -ms2.s2[5] = 0xab -assert MemShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd - -# void* style cast -MemPtrVoid = Ptr("I", Void()).pinned -p = MemPtrVoid(jitter.vm) -p.val = mstruct.get_addr() -assert p.deref.cast(MyStruct) == mstruct -assert p.cast(Ptr("I", MyStruct)).deref == mstruct - -# Field equality tests -assert RawStruct("IH") == RawStruct("IH") -assert RawStruct("I") != RawStruct("IH") -assert Num("I") == Num("I") -assert Num(">I") != Num("I", MyStruct) != Ptr("I")), + ])), + ("last", Num("B")), + ] + +uni = UniStruct(jitter.vm) +jitter.vm.set_mem(uni.get_addr(), ''.join(chr(x) for x in xrange(len(uni)))) +assert len(uni) == 6 # 1 + max(InStruct.sizeof(), 4) + 1 +assert uni.one == 0x00 +assert uni.union.instruct.foo == 0x01 +assert uni.union.instruct.bar == 0x02 +assert uni.union.i == 0x01020304 +assert uni.last == 0x05 +uni.union.instruct.foo = 0x02 +assert uni.union.i == 0x02020304 +uni.union.i = 0x11223344 +assert uni.union.instruct.foo == 0x11 +assert uni.union.instruct.bar == 0x22 + + +# BitField test +class BitStruct(MemUnion): + fields = [ + ("flags_num", Num("H")), + ("flags", BitField(Num("H"), [ + ("f1_1", 1), + ("f2_5", 5), + ("f3_8", 8), + ("f4_1", 1), + ])), + ] + +bit = BitStruct(jitter.vm) +bit.memset() +assert bit.flags_num == 0 +assert bit.flags.f1_1 == 0 +assert bit.flags.f2_5 == 0 +assert bit.flags.f3_8 == 0 +assert bit.flags.f4_1 == 0 +bit.flags.f1_1 = 1 +bit.flags.f2_5 = 0b10101 +bit.flags.f3_8 = 0b10000001 +assert bit.flags_num == 0b0010000001101011 +assert bit.flags.f1_1 == 1 +assert bit.flags.f2_5 == 0b10101 +assert bit.flags.f3_8 == 0b10000001 +assert bit.flags.f4_1 == 0 +bit.flags_num = 0b1101010101011100 +assert bit.flags.f1_1 == 0 +assert bit.flags.f2_5 == 0b01110 +assert bit.flags.f3_8 == 0b01010101 +assert bit.flags.f4_1 == 1 + + +# Unhealthy ideas +class UnhealthyIdeas(MemStruct): + fields = [ + ("pastruct", Ptr("I", Array(RawStruct("=Bf")))), + ("apstr", Array(Ptr("I", Str()), 10)), + ("pself", Ptr("I", Self())), + ("apself", Array(Ptr("I", Self()), 2)), + ("ppself", Ptr("I", Ptr("I", Self()))), + ("pppself", Ptr("I", Ptr("I", Ptr("I", Self())))), + ] + +p_size = Ptr("I", Void()).size() + +ideas = UnhealthyIdeas(jitter.vm) +ideas.memset() +ideas.pself = ideas.get_addr() +assert ideas == ideas.pself.deref + +ideas.apself[0] = ideas.get_addr() +assert ideas.apself[0].deref == ideas +ideas.apself[1] = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) +ideas.apself[1].deref = ideas +assert ideas.apself[1] != ideas.get_addr() +assert ideas.apself[1].deref == ideas + +ideas.ppself = my_heap.vm_alloc(jitter.vm, p_size) +ideas.ppself.deref.val = ideas.get_addr() +assert ideas.ppself.deref.val == ideas.get_addr() +assert ideas.ppself.deref.deref == ideas + +ideas.ppself.deref.val = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) +ideas.ppself.deref.deref = ideas +assert ideas.ppself.deref.val != ideas.get_addr() +assert ideas.ppself.deref.deref == ideas + +ideas.pppself = my_heap.vm_alloc(jitter.vm, p_size) +ideas.pppself.deref.val = my_heap.vm_alloc(jitter.vm, p_size) +ideas.pppself.deref.deref.val = ideas.get_addr() +assert ideas.pppself.deref.deref.deref == ideas + + +# Circular dependencies +class A(MemStruct): + pass + +class B(MemStruct): + fields = [("a", Ptr("I", A)),] + +# Gen A's fields after declaration +A.gen_fields([("b", Ptr("I", B)),]) + +a = A(jitter.vm) +b = B(jitter.vm) +a.b.val = b.get_addr() +b.a.val = a.get_addr() +assert a.b.deref == b +assert b.a.deref == a + + +# Cast tests +# MemStruct cast +MemInt = Num("I").pinned +MemShort = Num("H").pinned +dword = MemInt(jitter.vm) +dword.val = 0x12345678 +assert isinstance(dword.cast(MemShort), MemShort) +assert dword.cast(MemShort).val == 0x5678 + +# Field cast +ms2.s2[0] = 0x34 +ms2.s2[1] = 0x12 +assert ms2.cast_field("s2", MemShort).val == 0x1234 + +# Other method +assert MemShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 + +# Manual cast inside an Array +ms2.s2[4] = 0xcd +ms2.s2[5] = 0xab +assert MemShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd + +# void* style cast +MemPtrVoid = Ptr("I", Void()).pinned +p = MemPtrVoid(jitter.vm) +p.val = mstruct.get_addr() +assert p.deref.cast(MyStruct) == mstruct +assert p.cast(Ptr("I", MyStruct)).deref == mstruct + +# Field equality tests +assert RawStruct("IH") == RawStruct("IH") +assert RawStruct("I") != RawStruct("IH") +assert Num("I") == Num("I") +assert Num(">I") != Num("I", MyStruct) != Ptr(" Date: Fri, 4 Dec 2015 10:16:37 +0100 Subject: MemStruct/Types: pinned renamed to lval --- example/jitter/types.py | 2 ++ miasm2/core/types.py | 79 ++++++++++++++++++++++++++++++++++--------------- test/core/types.py | 44 +++++++++++++-------------- 3 files changed, 79 insertions(+), 46 deletions(-) (limited to 'test') diff --git a/example/jitter/types.py b/example/jitter/types.py index 6d8543b4..f4a7ddb4 100644 --- a/example/jitter/types.py +++ b/example/jitter/types.py @@ -229,6 +229,8 @@ print "See that the original array has been modified:" print repr(data) print +# TODO: type manipulation examples + print "See test/core/types.py and the miasm2.core.types module doc for " print "more information." diff --git a/miasm2/core/types.py b/miasm2/core/types.py index 3c8d5b8b..d6bc3cf5 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -1,5 +1,34 @@ -"""This module provides classes to manipulate C structures backed by a VmMngr -object (a miasm sandbox virtual memory). +"""This module provides classes to manipulate pure C types as well as their +representation in memory. A typical usecase is to use this module to +easily manipylate structures backed by a VmMngr object (a miasm sandbox virtual +memory): + + class ListNode(MemStruct): + fields = [ + ("next", Ptr(", ): - # FIXME: "I" => "u32" class MyStruct(MemStruct): fields = [ # Scalar field: just struct.pack field with one value @@ -210,10 +238,13 @@ class Type(object): return self._unpack(raw) @property - def pinned(self): + def lval(self): """Returns a class with a (vm, addr) constructor that allows to interact with this type in memory. + In compilation terms, it returns a class allowing to instanciate an + lvalue of this type. + @return: a MemType subclass. """ if self in DYN_MEM_STRUCT_CACHE: @@ -225,7 +256,7 @@ class Type(object): def _build_pinned_type(self): """Builds the MemType subclass allowing to interract with this type. - Called by self.pinned when it is not in cache. + Called by self.lval when it is not in cache. """ pinned_base_class = self._get_pinned_base_class() pinned_type = type("Mem%r" % self, (pinned_base_class,), @@ -324,7 +355,7 @@ class Ptr(Num): in memory @dst_type: (MemType or Type) the MemType this Ptr points to. If a Type is given, it is transformed into a MemType with - TheType.pinned. + TheType.lval. *type_args, **type_kwargs: arguments to pass to the the pointed MemType when instanciating it (e.g. for MemStr encoding or MemArray field_type). @@ -342,10 +373,10 @@ class Ptr(Num): dst_type._get_self_type = lambda: self._get_self_type() # dst_type cannot be patched here, since _get_self_type of the outer # class has not yet been set. Patching dst_type involves calling - # dst_type.pinned, which will only return a type that does not point + # dst_type.lval, which will only return a type that does not point # on MemSelf but on the right class only when _get_self_type of the # outer class has been replaced by _MetaMemStruct. - # In short, dst_type = dst_type.pinned is not valid here, it is done + # In short, dst_type = dst_type.lval is not valid here, it is done # lazily in _fix_dst_type self._dst_type = dst_type self._type_args = type_args @@ -358,7 +389,7 @@ class Ptr(Num): else: raise ValueError("Unsupported usecase for MemSelf, sorry") if isinstance(self._dst_type, Type): - self._dst_type = self._dst_type.pinned + self._dst_type = self._dst_type.lval @property def dst_type(self): @@ -374,7 +405,7 @@ class Ptr(Num): super(Ptr, self).set(vm, addr, val) def get(self, vm, addr): - return self.pinned(vm, addr) + return self.lval(vm, addr) def get_val(self, vm, addr): """Get the numeric value of a Ptr""" @@ -429,14 +460,14 @@ class Struct(Type): Mapped to MemStruct. - NOTE: The `.pinned` property of Struct creates classes on the fly. If an + NOTE: The `.lval` property of Struct creates classes on the fly. If an equivalent structure is created by subclassing MemStruct, an exception is raised to prevent creating multiple classes designating the same type. Example: s = Struct("Toto", [("f1", Num("I")), ("f2", Num("I"))]) - Toto1 = s.pinned + Toto1 = s.lval # This raises an exception, because it describes the same structure as # Toto1 @@ -469,7 +500,7 @@ class Struct(Type): vm.set_mem(addr, raw) def get(self, vm, addr): - return self.pinned(vm, addr) + return self.lval(vm, addr) def get_field(self, vm, addr, name): """Get a field value by @name and base structure @addr in @vm VmMngr.""" @@ -618,7 +649,7 @@ class Array(Type): "Assignment only implemented for list and MemSizedArray") def get(self, vm, addr): - return self.pinned(vm, addr) + return self.lval(vm, addr) def size(self): if self.is_sized(): @@ -949,7 +980,7 @@ class MemType(object): virtual memory. Globally, MemTypes are not meant to be used directly: specialized - subclasses are generated by Type(...).pinned and should be used instead. + subclasses are generated by Type(...).lval and should be used instead. The main exception is MemStruct, which you may want to subclass yourself for syntactic ease. """ @@ -1022,11 +1053,11 @@ class MemType(object): """Cast this MemType to another MemType (same address, same vm, but different type). Return the casted MemType. - @other_type: either a Type instance (other_type.pinned is used) or a + @other_type: either a Type instance (other_type.lval is used) or a MemType subclass """ if isinstance(other_type, Type): - other_type = other_type.pinned + other_type = other_type.lval return other_type(self._vm, self.get_addr()) def cast_field(self, field, other_type, *type_args, **type_kwargs): @@ -1035,7 +1066,7 @@ class MemType(object): @field: field specification, for example its name for a struct, or an index in an array. See the subclass doc. - @other_type: either a Type instance (other_type.pinned is used) or a + @other_type: either a Type instance (other_type.lval is used) or a MemType subclass """ raise NotImplementedError("Abstract") @@ -1127,7 +1158,7 @@ class MemStruct(MemType): assert mstruct.other.deref.foo == 0x1234 Note that: - MyStruct = Struct("MyStruct", ).pinned + MyStruct = Struct("MyStruct", ).lval is equivalent to the previous MyStruct declaration. See the various Type-s doc for more information. See MemStruct.gen_fields @@ -1166,7 +1197,7 @@ class MemStruct(MemType): def cast_field(self, field, other_type): """In this implementation, @field is a field name""" if isinstance(other_type, Type): - other_type = other_type.pinned + other_type = other_type.lval return other_type(self._vm, self.get_addr(field)) # Field generation method, voluntarily public to be able to gen fields @@ -1214,7 +1245,7 @@ class MemStruct(MemType): "one. Use it instead.") # Register this class so that another one will not be created when - # calling cls._type.pinned + # calling cls._type.lval DYN_MEM_STRUCT_CACHE[cls._type] = cls cls._gen_attributes() @@ -1336,7 +1367,7 @@ class MemArray(MemType): It can be indexed for setting and getting elements, example: - array = Array(Num("I")).pinned(vm, addr)) + array = Array(Num("I")).lval(vm, addr)) array[2] = 5 array[4:8] = [0, 1, 2, 3] print array[20] @@ -1371,7 +1402,7 @@ class MemSizedArray(MemArray): """A fixed size MemArray. This type is dynamically sized. Generate a fixed @field_type and @array_len - array which has a static size by using Array(type, size).pinned. + array which has a static size by using Array(type, size).lval. """ @property diff --git a/test/core/types.py b/test/core/types.py index db72449c..f1ff706b 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -119,7 +119,7 @@ assert memval == 8 # Str tests ## Basic tests -memstr = Str().pinned(jitter.vm, addr_str) +memstr = Str().lval(jitter.vm, addr_str) memstr.val = "" assert memstr.val == "" assert jitter.vm.get_mem(memstr.get_addr(), 1) == '\x00' @@ -138,13 +138,13 @@ assert mstruct.s.deref.val == "That's all folks!" assert memstr.val == "That's all folks!" ## Other address, same value, same encoding -memstr2 = Str().pinned(jitter.vm, addr_str2) +memstr2 = Str().lval(jitter.vm, addr_str2) memstr2.val = "That's all folks!" assert memstr2.get_addr() != memstr.get_addr() assert memstr2 == memstr ## Same value, other encoding -memstr3 = Str("utf16").pinned(jitter.vm, addr_str3) +memstr3 = Str("utf16").lval(jitter.vm, addr_str3) memstr3.val = "That's all folks!" assert memstr3.get_addr() != memstr.get_addr() assert memstr3.get_size() != memstr.get_size() # Size is different @@ -156,7 +156,7 @@ assert memstr3.val == memstr.val # But the python value is the same # Array tests # Allocate buffer manually, since memarray is unsized alloc_addr = my_heap.vm_alloc(jitter.vm, 0x100) -memarray = Array(Num("I")).pinned(jitter.vm, alloc_addr) +memarray = Array(Num("I")).lval(jitter.vm, alloc_addr) memarray[0] = 0x02 assert memarray[0] == 0x02 assert jitter.vm.get_mem(memarray.get_addr(), @@ -188,8 +188,8 @@ except ValueError: pass -memsarray = Array(Num("I"), 10).pinned(jitter.vm) -# And Array(type, size).pinned generates statically sized types +memsarray = Array(Num("I"), 10).lval(jitter.vm) +# And Array(type, size).lval generates statically sized types assert memsarray.sizeof() == Num("I").size() * 10 memsarray.memset('\xcc') assert memsarray[0] == 0xcccccccc @@ -237,7 +237,7 @@ for val in ms2.s2: assert val == 1 ### Field assignment (MemSizedArray) -array2 = Array(Num("B"), 10).pinned(jitter.vm) +array2 = Array(Num("B"), 10).lval(jitter.vm) jitter.vm.set_mem(array2.get_addr(), '\x02'*10) for val in array2: assert val == 2 @@ -406,8 +406,8 @@ assert b.a.deref == a # Cast tests # MemStruct cast -MemInt = Num("I").pinned -MemShort = Num("H").pinned +MemInt = Num("I").lval +MemShort = Num("H").lval dword = MemInt(jitter.vm) dword.val = 0x12345678 assert isinstance(dword.cast(MemShort), MemShort) @@ -427,7 +427,7 @@ ms2.s2[5] = 0xab assert MemShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd # void* style cast -MemPtrVoid = Ptr("I", Void()).pinned +MemPtrVoid = Ptr("I", Void()).lval p = MemPtrVoid(jitter.vm) p.val = mstruct.get_addr() assert p.deref.cast(MyStruct) == mstruct @@ -474,17 +474,17 @@ assert BitField(Num("B"), [("f1", 1), ("f2", 4), ("f3", 1)]) != \ BitField(Num("B"), [("f1", 2), ("f2", 4), ("f3", 1)]) -# Quick MemField.pinned/MemField hash test -assert Num("f").pinned(jitter.vm, addr) == Num("f").pinned(jitter.vm, addr) +# Quick MemField.lval/MemField hash test +assert Num("f").lval(jitter.vm, addr) == Num("f").lval(jitter.vm, addr) # Types are cached -assert Num("f").pinned == Num("f").pinned -assert Num("d").pinned != Num("f").pinned -assert Union([("f1", Num("I")), ("f2", Num("H"))]).pinned == \ - Union([("f1", Num("I")), ("f2", Num("H"))]).pinned -assert Array(Num("B")).pinned == Array(Num("B")).pinned -assert Array(Num("I")).pinned != Array(Num("B")).pinned -assert Array(Num("B"), 20).pinned == Array(Num("B"), 20).pinned -assert Array(Num("B"), 19).pinned != Array(Num("B"), 20).pinned +assert Num("f").lval == Num("f").lval +assert Num("d").lval != Num("f").lval +assert Union([("f1", Num("I")), ("f2", Num("H"))]).lval == \ + Union([("f1", Num("I")), ("f2", Num("H"))]).lval +assert Array(Num("B")).lval == Array(Num("B")).lval +assert Array(Num("I")).lval != Array(Num("B")).lval +assert Array(Num("B"), 20).lval == Array(Num("B"), 20).lval +assert Array(Num("B"), 19).lval != Array(Num("B"), 20).lval # Repr tests @@ -496,8 +496,8 @@ print repr(cont), '\n' print repr(uni), '\n' print repr(bit), '\n' print repr(ideas), '\n' -print repr(Array(MyStruct2.get_type(), 2).pinned(jitter.vm, addr)), '\n' -print repr(Num("f").pinned(jitter.vm, addr)), '\n' +print repr(Array(MyStruct2.get_type(), 2).lval(jitter.vm, addr)), '\n' +print repr(Num("f").lval(jitter.vm, addr)), '\n' print repr(memarray) print repr(memsarray) print repr(memstr) -- cgit 1.4.1 From 36cae74bff4674396b35a208bc7ac57f0d4e2b6b Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Sat, 5 Dec 2015 14:44:23 +0100 Subject: Types: Type size is now a property --- example/jitter/types.py | 2 +- miasm2/core/types.py | 35 +++++++++++++++++++++-------------- test/core/types.py | 10 +++++----- 3 files changed, 27 insertions(+), 20 deletions(-) (limited to 'test') diff --git a/example/jitter/types.py b/example/jitter/types.py index 6c0b59af..e714372c 100644 --- a/example/jitter/types.py +++ b/example/jitter/types.py @@ -171,7 +171,7 @@ link.push(DataArray(vm)) assert link.size == 3 # If you get it directly from the VM, it is updated as well raw_size = vm.get_mem(link.get_addr("size"), link.get_type() - .get_field_type("size").size()) + .get_field_type("size").size) assert raw_size == '\x03\x00\x00\x00' print "The linked list just built:" diff --git a/miasm2/core/types.py b/miasm2/core/types.py index bf8f7823..03d23a5c 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -262,7 +262,7 @@ class Type(object): def get(self, vm, addr): """Get the python value of a field from a VmMngr memory at @addr.""" - raw = vm.get_mem(addr, self.size()) + raw = vm.get_mem(addr, self.size) return self._unpack(raw) @property @@ -306,12 +306,13 @@ class Type(object): """ self._self_type = self_type + @property def size(self): """Return the size in bytes of the serialized version of this field""" raise NotImplementedError() def __len__(self): - return self.size() + return self.size def __neq__(self, other): return not self == other @@ -332,6 +333,7 @@ class RawStruct(Type): def _unpack(self, raw_str): return struct.unpack(self._fmt, raw_str) + @property def size(self): return struct.calcsize(self._fmt) @@ -517,7 +519,7 @@ class Struct(Type): # For reflexion field._set_self_type(self) self._fields_desc[name] = {"field": field, "offset": offset} - offset += field.size() + offset += field.size @property def fields(self): @@ -549,8 +551,9 @@ class Struct(Type): offset = self.get_offset(name) field.set(vm, addr + offset, val) + @property def size(self): - return sum(field.size() for _, field in self.fields) + return sum(field.size for _, field in self.fields) def get_offset(self, field_name): """ @@ -609,8 +612,9 @@ class Union(Struct): """@field_list: a [(name, field)] list, see the class doc""" super(Union, self).__init__("union", field_list) + @property def size(self): - return max(field.size() for _, field in self.fields) + return max(field.size for _, field in self.fields) def get_offset(self, field_name): return 0 @@ -658,7 +662,7 @@ class Array(Type): def set(self, vm, addr, val): # MemSizedArray assignment if isinstance(val, MemSizedArray): - if val.array_len != self.array_len or len(val) != self.size(): + if val.array_len != self.array_len or len(val) != self.size: raise ValueError("Size mismatch in MemSizedArray assignment") raw = str(val) vm.set_mem(addr, raw) @@ -670,7 +674,7 @@ class Array(Type): offset = 0 for elt in val: self.field_type.set(vm, addr + offset, elt) - offset += self.field_type.size() + offset += self.field_type.size else: raise RuntimeError( @@ -679,6 +683,7 @@ class Array(Type): def get(self, vm, addr): return self.lval(vm, addr) + @property def size(self): if self.is_sized(): return self.get_offset(self.array_len) @@ -688,7 +693,7 @@ class Array(Type): def get_offset(self, idx): """Returns the offset of the item at index @idx.""" - return self.field_type.size() * idx + return self.field_type.size * idx def get_item(self, vm, addr, idx): """Get the item(s) at index @idx. @@ -745,7 +750,7 @@ class Array(Type): def _check_bounds(self, idx): if not isinstance(idx, (int, long)): raise ValueError("index must be an int or a long") - if idx < 0 or (self.is_sized() and idx >= self.size()): + if idx < 0 or (self.is_sized() and idx >= self.size): raise IndexError("Index %s out of bounds" % idx) def _get_pinned_base_class(self): @@ -786,7 +791,7 @@ class Bits(Type): def set(self, vm, addr, val): val_mask = (1 << self._bits) - 1 val_shifted = (val & val_mask) << self._bit_offset - num_size = self._num.size() * 8 + num_size = self._num.size * 8 full_num_mask = (1 << num_size) - 1 num_mask = (~(val_mask << self._bit_offset)) & full_num_mask @@ -801,8 +806,9 @@ class Bits(Type): res_val = (num_val >> self._bit_offset) & val_mask return res_val + @property def size(self): - return self._num.size() + return self._num.size @property def bit_size(self): @@ -868,7 +874,7 @@ class BitField(Union): for name, bits in bit_list: fields.append((name, Bits(self._num, bits, offset))) offset += bits - if offset > self._num.size() * 8: + if offset > self._num.size == 8: raise ValueError("sum of bit lengths is > to the backing num size") super(BitField, self).__init__(fields) @@ -968,6 +974,7 @@ class Str(Type): set_str = self.encodings[self.enc][1] set_str(vm, addr, s) + @property def size(self): """This type is unsized.""" raise ValueError("Str is unsized") @@ -1130,7 +1137,7 @@ class MemType(object): """Return the static size of this type. By default, it is the size of the underlying Type. """ - return cls._type.size() + return cls._type.size def get_size(self): """Return the dynamic size of this structure (e.g. the size of an @@ -1518,7 +1525,7 @@ class MemSizedArray(MemArray): return self.get_type().array_len def get_size(self): - return self.get_type().size() + return self.get_type().size def __iter__(self): for i in xrange(self.get_type().array_len): diff --git a/test/core/types.py b/test/core/types.py index f1ff706b..de6034ef 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -160,11 +160,11 @@ memarray = Array(Num("I")).lval(jitter.vm, alloc_addr) memarray[0] = 0x02 assert memarray[0] == 0x02 assert jitter.vm.get_mem(memarray.get_addr(), - Num("I").size()) == '\x02\x00\x00\x00' + Num("I").size) == '\x02\x00\x00\x00' memarray[2] = 0xbbbbbbbb assert memarray[2] == 0xbbbbbbbb -assert jitter.vm.get_mem(memarray.get_addr() + 2 * Num("I").size(), - Num("I").size()) == '\xbb\xbb\xbb\xbb' +assert jitter.vm.get_mem(memarray.get_addr() + 2 * Num("I").size, + Num("I").size) == '\xbb\xbb\xbb\xbb' try: s = str(memarray) assert False, "Should raise" @@ -190,7 +190,7 @@ except ValueError: memsarray = Array(Num("I"), 10).lval(jitter.vm) # And Array(type, size).lval generates statically sized types -assert memsarray.sizeof() == Num("I").size() * 10 +assert memsarray.sizeof() == Num("I").size * 10 memsarray.memset('\xcc') assert memsarray[0] == 0xcccccccc assert len(memsarray) == 10 * 4 @@ -356,7 +356,7 @@ class UnhealthyIdeas(MemStruct): ("pppself", Ptr("I", Ptr("I", Ptr("I", Self())))), ] -p_size = Ptr("I", Void()).size() +p_size = Ptr("I", Void()).size ideas = UnhealthyIdeas(jitter.vm) ideas.memset() -- cgit 1.4.1 From e9ab0bd0f9c6dde642904cb473d57de9c81747b5 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Sat, 5 Dec 2015 15:21:23 +0100 Subject: Types: short test to assert MemStruct unicity --- test/core/types.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'test') diff --git a/test/core/types.py b/test/core/types.py index de6034ef..96765fe7 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -486,6 +486,10 @@ assert Array(Num("I")).lval != Array(Num("B")).lval assert Array(Num("B"), 20).lval == Array(Num("B"), 20).lval assert Array(Num("B"), 19).lval != Array(Num("B"), 20).lval +# MemStruct unicity test +assert MyStruct == Struct(MyStruct.__name__, MyStruct.fields).lval +assert MyStruct.get_type() == Struct(MyStruct.__name__, MyStruct.fields) + # Repr tests -- cgit 1.4.1 From 0379f8e91fa54fe641948f01bb98a76fab47033a Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 14 Dec 2015 11:12:42 +0100 Subject: Types: adding the ("field", SomeMemType) syntax Shorthand for ("field", SomeMemStruct.get_type()) in a Struct or MemStruct fields definition. --- miasm2/core/types.py | 23 ++++++++++++++++++----- test/core/types.py | 5 +++-- 2 files changed, 21 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/miasm2/core/types.py b/miasm2/core/types.py index 03d23a5c..8d9687eb 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -507,19 +507,32 @@ class Struct(Type): def __init__(self, name, fields): self.name = name - # fields is immutable - self._fields = tuple(fields) - self._gen_fields() + # generates self._fields and self._fields_desc + self._gen_fields(fields) - def _gen_fields(self): + def _gen_fields(self, fields): """Precompute useful metadata on self.fields.""" self._fields_desc = {} offset = 0 - for name, field in self._fields: + + # Build a proper (name, Field()) list, handling cases where the user + # supplies a MemType subclass instead of a Type instance + real_fields = [] + for name, field in fields: + if isinstance(field, type) and issubclass(field, MemType): + if field._type is None: + raise ValueError("%r has no static type; use a subclasses " + "with a non null _type or use a " + "Type instance") + field = field.get_type() + real_fields.append((name, field)) + # For reflexion field._set_self_type(self) self._fields_desc[name] = {"field": field, "offset": offset} offset += field.size + # fields is immutable + self._fields = tuple(real_fields) @property def fields(self): diff --git a/test/core/types.py b/test/core/types.py index 96765fe7..c59a68d6 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -256,7 +256,8 @@ class InStruct(MemStruct): class ContStruct(MemStruct): fields = [ ("one", Num("B")), - ("instruct", InStruct.get_type()), + # Shorthand for: ("instruct", InStruct.get_type()), + ("instruct", InStruct), ("last", Num("B")), ] @@ -290,7 +291,7 @@ class UniStruct(MemStruct): fields = [ ("one", Num("B")), ("union", Union([ - ("instruct", InStruct.get_type()), + ("instruct", InStruct), ("i", Num(">I")), ])), ("last", Num("B")), -- cgit 1.4.1 From 03b3a84e0dd4d4d01b471f1767d4aec68b9a90ad Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Tue, 15 Dec 2015 16:35:31 +0100 Subject: Types: Support anonymous Struct/Union/BitField See the test addition for an example. A Struct, Union, or BitField field with no name will be considered anonymous: all its fields will be added to the parent Struct/Union/BitField. This implements this kind of C declaration: struct foo { int a; union { int bar; struct { short baz; short foz; }; }; } --- miasm2/core/types.py | 83 +++++++++++++++++++++++++++++++++++++++++++++------- test/core/types.py | 27 +++++++++++++++++ 2 files changed, 99 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/miasm2/core/types.py b/miasm2/core/types.py index 8d9687eb..4bab3bde 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -370,7 +370,7 @@ class Ptr(Num): MemType. Mapped to MemPtr (see its doc for more info): - + assert isinstance(mystruct.ptr, MemPtr) mystruct.ptr = 0x4000 # Assign the Ptr numeric value mystruct.ptr.val = 0x4000 # Also assigns the Ptr numeric value @@ -503,6 +503,24 @@ class Struct(Type): # Toto1 class Toto(MemStruct): fields = [("f1", Num("I")), ("f2", Num("I"))] + + Anonymous Struct, Union or BitField can be used if their field name + evaluates to False ("" or None). Such anonymous Struct field will generate + fields to the parent Struct, e.g.: + bla = Struct("Bla", [ + ("a", Num("B")), + ("", Union([("b1", Num("B")), ("b2", Num("H"))])), + ("", Struct("", [("c1", Num("B")), ("c2", Num("B"))])), + ] + Will have a b1, b2 and c1, c2 field directly accessible. The anonymous + fields are renamed to "__anon_", with an incremented number. + + In such case, bla.fields will not contain b1, b2, c1 and c2 (only the 3 + actual fields, with the anonymous ones renamed), but bla.all_fields will + return the 3 fields + b1, b2, c1 and c2 (and an information telling if it + has been generated from an anonymous Struct/Union). + + bla.get_field(vm, addr, "b1") will work. """ def __init__(self, name, fields): @@ -518,24 +536,67 @@ class Struct(Type): # Build a proper (name, Field()) list, handling cases where the user # supplies a MemType subclass instead of a Type instance real_fields = [] - for name, field in fields: + uniq_count = 0 + for fname, field in fields: if isinstance(field, type) and issubclass(field, MemType): if field._type is None: raise ValueError("%r has no static type; use a subclasses " "with a non null _type or use a " "Type instance") field = field.get_type() - real_fields.append((name, field)) # For reflexion field._set_self_type(self) - self._fields_desc[name] = {"field": field, "offset": offset} - offset += field.size + + # Anonymous Struct/Union + if not fname and isinstance(field, Struct): + # Generate field information + updated_fields = { + name: { + # Same field type than the anon field subfield + 'field': fd['field'], + # But the current offset is added + 'offset': fd['offset'] + offset, + } + for name, fd in field._fields_desc.iteritems() + } + + # Add the newly generated fields from the anon field + self._fields_desc.update(updated_fields) + real_fields += [(name, fld, True) + for name, fld in field.fields] + + # Rename the anonymous field + fname = '__anon_%x' % uniq_count + uniq_count += 1 + + self._fields_desc[fname] = {"field": field, "offset": offset} + real_fields.append((fname, field, False)) + offset = self._next_offset(field, offset) + # fields is immutable self._fields = tuple(real_fields) + def _next_offset(self, field, orig_offset): + return orig_offset + field.size + @property def fields(self): + """Returns a sequence of (name, field) describing the fields of this + Struct, in order of offset. + + Fields generated from anonymous Unions or Structs are excluded from + this sequence. + """ + return tuple((name, field) for name, field, anon in self._fields + if not anon) + + @property + def all_fields(self): + """Returns a sequence of (, , ), + where is_anon is True when a field is generated from an anonymous + Struct or Union, and False for the fields that have been provided as is. + """ return self._fields def set(self, vm, addr, val): @@ -600,7 +661,7 @@ class Struct(Type): class Union(Struct): """Represents a C union. - + Allows to put multiple fields at the same offset in a MemStruct, similar to unions in C. The Union will have the size of the largest of its fields. @@ -629,8 +690,8 @@ class Union(Struct): def size(self): return max(field.size for _, field in self.fields) - def get_offset(self, field_name): - return 0 + def _next_offset(self, field, orig_offset): + return orig_offset def _get_pinned_base_class(self): return MemUnion @@ -1019,7 +1080,7 @@ class Str(Type): class Void(Type): """Represents the C void type. - + Mapped to MemVoid. """ @@ -1035,7 +1096,7 @@ class Void(Type): class Self(Void): """Special marker to reference a type inside itself. - + Mapped to MemSelf. Example: @@ -1375,7 +1436,7 @@ class MemStruct(MemType): @classmethod def _gen_attributes(cls): # Generate self. getter and setters - for name, field in cls._type.fields: + for name, field, _ in cls._type.all_fields: setattr(cls, name, property( lambda self, name=name: self.get_field(name), lambda self, val, name=name: self.set_field(name, val) diff --git a/test/core/types.py b/test/core/types.py index c59a68d6..7ad8ad13 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -491,6 +491,33 @@ assert Array(Num("B"), 19).lval != Array(Num("B"), 20).lval assert MyStruct == Struct(MyStruct.__name__, MyStruct.fields).lval assert MyStruct.get_type() == Struct(MyStruct.__name__, MyStruct.fields) +# Anonymous Unions +class Anon(MemStruct): + fields = [ + ("a", Num("B")), + # If a field name evaluates to False ("" or None for example) and the + # field type is a Struct subclass (Struct, Union, BitField), the field + # is considered as an anonymous struct or union. Therefore, Anon will + # have b1, b2 and c1, c2 attributes in that case. + ("", Union([("b1", Num("B")), ("b2", Num("H"))])), + ("", Struct("", [("c1", Num("B")), ("c2", Num("B"))])), + ("d", Num("B")), + ] + +anon = Anon(jitter.vm) +anon.memset() +anon.a = 0x07 +anon.b2 = 0x0201 +anon.c1 = 0x55 +anon.c2 = 0x77 +anon.d = 0x33 +assert anon.a == 0x07 +assert anon.b1 == 0x01 +assert anon.b2 == 0x0201 +assert anon.c1 == 0x55 +assert anon.c2 == 0x77 +assert anon.d == 0x33 + # Repr tests -- cgit 1.4.1 From e6ec6f9d800584234301733340b171092aac3f9a Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 21 Dec 2015 11:29:37 +0100 Subject: Types: typo, MemStruct.get_offset should be a classmethod Also added tests and MemArray.get_offset --- miasm2/core/types.py | 10 ++++++++-- test/core/types.py | 11 +++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/miasm2/core/types.py b/miasm2/core/types.py index b6eaf0e4..bb7536e5 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -1362,9 +1362,10 @@ class MemStruct(MemType): offset = 0 return self._addr + offset - def get_offset(self, field_name): + @classmethod + def get_offset(cls, field_name): """Shorthand for self.get_type().get_offset(field_name).""" - return self._type.get_offset(field_name) + return cls.get_type().get_offset(field_name) def get_field(self, name): """Get a field value by name. @@ -1575,6 +1576,11 @@ class MemArray(MemType): def get_addr(self, idx=0): return self._addr + self.get_type().get_offset(idx) + @classmethod + def get_offset(cls, idx): + """Shorthand for self.get_type().get_offset(idx).""" + return cls.get_type().get_offset(idx) + def __getitem__(self, idx): return self.get_type().get_item(self._vm, self._addr, idx) diff --git a/test/core/types.py b/test/core/types.py index 7ad8ad13..bb1d5da1 100644 --- a/test/core/types.py +++ b/test/core/types.py @@ -518,6 +518,17 @@ assert anon.c1 == 0x55 assert anon.c2 == 0x77 assert anon.d == 0x33 +# get_offset +for field, off in (("a", 0), ("b1", 1), ("b2", 1), ("c1", 3), ("c2", 4), + ("d", 5)): + assert Anon.get_offset(field) == Anon.get_type().get_offset(field) + assert Anon.get_offset(field) == off + +arr_t = Array(Num("H")) +for idx, off in ((0, 0), (1, 2), (30, 60)): + assert arr_t.get_offset(idx) == arr_t.lval.get_offset(idx) + assert arr_t.get_offset(idx) == off + # Repr tests -- cgit 1.4.1