1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
|
"""This module provides classes to manipulate C structures backed by a VmMngr
object (a miasm VM virtual memory).
The main idea is to declare the fields of the structure in the class:
# FIXME: "I" => "u32"
class MyStruct(MemStruct):
fields = [
# Integer field: just struct.pack fields with one value
("num", Num("I")),
("flags", Num("B")),
# Ptr fields are Num, but they can also be dereferenced
# (self.deref_<field>). Deref can be read and set.
("other", Ptr("I", OtherStruct)),
# Ptr to a variable length String
("s", Ptr("I", MemStr)),
("i", Ptr("I", Num("I"))),
]
And access the fields:
mstruct = MyStruct(jitter.vm, addr)
mstruct.num = 3
assert mstruct.num == 3
mstruct.other = addr2
mstruct.deref_other = OtherStruct(jitter.vm, addr)
The `addr` argument can be omited if an allocator is set, in which case the
structure will be automatically allocated in memory:
my_heap = miasm2.os_dep.common.heap()
set_allocator(my_heap)
Note that some structures (e.g. MemStr or MemArray) do not have a static size
and cannot be allocated automatically.
As you saw previously, to use this module, you just have to inherit from
MemStruct and define a list of (<field_name>, <field_definition>). Availabe
MemField classes are:
- Num: for number (float or int) handling
- Struct: abstraction over a simple struct pack/unpack
- Ptr: a pointer to another MemStruct instance
- Inline: include another MemStruct as a field (equivalent to having a
struct field into another struct in C)
- Array: a fixed size array of MemFields (points)
- Union: similar to `union` in C, list of MemFields at the same offset in a
structure; the union has the size of the biggest MemField
- BitField: similar to C bitfields, a list of
[(<field_name), (number_of_bits)]; creates fields that correspond to
certain bits of the field
A MemField always has a fixed size in memory.
Some special memory structures are already implemented; they all are subclasses
of MemStruct with a custom implementation:
- MemSelf: this class is just a special marker to reference a MemStruct
subclass inside itself. Works with Ptr and Array (e.g. Ptr(_, MemSelf)
for a pointer the same type as the class who uses this kind of field)
- MemVoid: empty MemStruct, placeholder to be casted to an implemented
MemStruct subclass
- MemStr: represents a string in memory; the encoding can be passed to the
constructor (null terminated ascii/ansi or null terminated utf16)
- MemArray: an unsized array of MemField; unsized here means that there is
no defined sized for this array, equivalent to a int* or char*-style table
in C. It cannot be allocated automatically, since it has no known size
- MemSizedArray: a sized MemArray, can be automatically allocated in memory
and allows more operations than MemArray
- mem: a function that dynamically generates a MemStruct subclass from a
MemField. This class has only one field named "value".
A MemStruct do not always have a static size (cls.sizeof()) nor a dynamic size
(self.get_size()).
"""
import logging
import struct
log = logging.getLogger(__name__)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
log.addHandler(console_handler)
log.setLevel(logging.WARN)
# allocator is a function(vm, size) -> allocated_address
allocator = None
def set_allocator(alloc_func):
"""Set an allocator for this module; allows to instanciate statically sized
MemStructs (i.e. sizeof() is implemented) without specifying the address
(the object is allocated by @alloc_func in the vm.
Args:
alloc_func: func(VmMngr) -> integer_address
"""
global allocator
allocator = alloc_func
# Helpers
def indent(s, size=4):
"""Indent a string with @size spaces"""
return ' '*size + ('\n' + ' '*size).join(s.split('\n'))
# FIXME: copied from miasm2.os_dep.common and fixed
def get_str_ansi(vm, addr, max_char=None):
"""Get a null terminated ANSI encoded string from a VmMngr.
Args:
vm: VmMngr instance
max_char: max number of characters to get in memory
"""
l = 0
tmp = addr
while ((max_char is None or l < max_char) and
vm.get_mem(tmp, 1) != "\x00"):
tmp += 1
l += 1
return vm.get_mem(addr, l).decode("latin1")
# TODO: get_raw_str_utf16 for length calculus
def get_str_utf16(vm, addr, max_char=None):
"""Get a (double) null terminated utf16 little endian encoded string from
a VmMngr. This encoding is mainly used in Windows.
FIXME: the implementation do not work with codepoints that are encoded on
more than 2 bytes in utf16.
Args:
vm: VmMngr instance
max_char: max number of bytes to get in memory
"""
l = 0
tmp = addr
# TODO: test if fetching per page rather than 2 byte per 2 byte is worth it?
while ((max_char is None or l < max_char) and
vm.get_mem(tmp, 2) != "\x00\x00"):
tmp += 2
l += 2
s = vm.get_mem(addr, l)
return s.decode('utf-16le')
def set_str_ansi(vm, addr, s):
"""Encode a string to null terminated ascii/ansi and set it in a VmMngr
memory.
Args:
vm: VmMngr instance
addr: start address to serialize the string to
s: the str to serialize
"""
vm.set_mem(addr, s + "\x00")
def set_str_utf16(vm, addr, s):
"""Same as set_str_ansi with (double) null terminated utf16 encoding."""
s = (s + '\x00').encode('utf-16le')
vm.set_mem(addr, s)
# MemField to MemStruct helper
# TODO: cache generated types
def mem(field):
"""Generate a MemStruct subclass from a field. The field's value can
be accessed through self.value or self.deref_value if field is a Ptr.
"""
fields = [("value", field)]
# Build a type to contain the field type
mem_type = type("Mem%r" % field, (MemStruct,), {'fields': fields})
return mem_type
# MemField classes
class MemField(object):
"""Base class to provide methods to set and get fields from virtual mem.
Subclasses can either override _pack and _unpack, or get and set if data
serialization requires more work (see Inline implementation for an example).
"""
_self_type = None
def _pack(self, val):
"""Serializes the python value @val to a raw str"""
raise NotImplementedError()
def _unpack(self, raw_str):
"""Deserializes a raw str to an object representing the python value
of this field.
"""
raise NotImplementedError()
def set(self, vm, addr, val):
"""Set a VmMngr memory from a value.
Args:
vm: VmMngr instance
addr: the start adress in memory to set
val: the python value to serialize in @vm at @addr
"""
raw = self._pack(val)
vm.set_mem(addr, raw)
def get(self, vm, addr):
"""Get the python value of a field from a VmMngr memory at @addr."""
raw = vm.get_mem(addr, self.size())
return self._unpack(raw)
def _get_self_type(self):
return self._self_type
def _set_self_type(self, self_type):
"""If this field refers to MemSelf, replace it with @self_type (a
MemStruct subclass) when using it. Generally not used outside the lib.
"""
self._self_type = self_type
def size(self):
"""Return the size in bytes of the serialized version of this field"""
raise NotImplementedError()
def __len__(self):
return self.size()
def __neq__(self, other):
return not self == other
class Struct(MemField):
"""Dumb struct.pack/unpack field. Mainly used to factorize code.
Value is a tuple corresponding to the struct @fmt passed to the constructor.
"""
def __init__(self, fmt):
self._fmt = fmt
def _pack(self, fields):
return struct.pack(self._fmt, *fields)
def _unpack(self, raw_str):
return struct.unpack(self._fmt, raw_str)
def size(self):
return struct.calcsize(self._fmt)
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__, self._fmt)
def __eq__(self, other):
return self.__class__ == other.__class__ and self._fmt == other._fmt
def __hash__(self):
return hash(hash(self.__class__) + hash(self._fmt))
class Num(Struct):
"""Represents a number (integer or float). The number is encoded with
a struct-style format which must represent only one value.
TODO: use u32, i16, etc. for format.
"""
def _pack(self, number):
return super(Num, self)._pack([number])
def _unpack(self, raw_str):
upck = super(Num, self)._unpack(raw_str)
if len(upck) > 1:
raise ValueError("Num format string unpacks to multiple values, "
"should be 1")
return upck[0]
class Ptr(Num):
"""Special case of number of which value indicates the address of a
MemStruct. Provides deref_<field> as well as <field> when used, to set and
get the pointed MemStruct.
"""
def __init__(self, fmt, dst_type, *type_args, **type_kwargs):
"""Args:
fmt: (str) Num compatible format that will be the Ptr representation
in memory
dst_type: (MemStruct or MemField) the MemStruct this Ptr points to.
If a MemField is given, it is transformed into a MemStruct with
mem(TheMemField).
*type_args, **type_kwargs: arguments to pass to the the pointed
MemStruct when instanciating it (e.g. for MemStr encoding or
MemArray field_type).
"""
if not isinstance(dst_type, MemField) and\
not (isinstance(dst_type, type) and\
issubclass(dst_type, MemStruct)) and\
not dst_type == MemSelf:
raise ValueError("dst_type of Ptr must be a MemStruct type, a "
"MemField instance, the MemSelf marker or a class "
"name.")
super(Ptr, self).__init__(fmt)
if isinstance(dst_type, MemField):
# Patch the field to propagate the MemSelf replacement
dst_type._get_self_type = lambda: self._get_self_type()
dst_type = mem(dst_type)
self._dst_type = dst_type
self._type_args = type_args
self._type_kwargs = type_kwargs
def _fix_dst_type(self):
if self._dst_type == MemSelf:
if self._get_self_type() is not None:
self._dst_type = self._get_self_type()
else:
raise ValueError("Unsupported usecase for MemSelf, sorry")
@property
def dst_type(self):
"""Return the type (MemStruct subtype) this Ptr points to."""
self._fix_dst_type()
return self._dst_type
def deref_get(self, vm, addr):
"""Deserializes the data in @vm (VmMngr) at @addr to self.dst_type.
Equivalent to a pointer dereference rvalue in C.
"""
return self.dst_type(vm, addr, *self._type_args, **self._type_kwargs)
def deref_set(self, vm, addr, val):
"""Serializes the @val MemStruct subclass instance in @vm (VmMngr) at
@addr. Equivalent to a pointer dereference assignment in C.
"""
# Sanity check
if self.dst_type != val.__class__:
log.warning("Original type was %s, overriden by value of type %s",
self._dst_type.__name__, val.__class__.__name__)
# Actual job
vm.set_mem(addr, str(val))
def __repr__(self):
return "%s(%r)" % (self.__class__.__name__, self._dst_type)
def __eq__(self, other):
return super(Ptr, self).__eq__(other) and \
self.dst_type == other.dst_type and \
self._type_args == other._type_args and \
self._type_kwargs == other._type_kwargs
def __hash__(self):
return hash(super(Ptr, self).__hash__() + hash(self._dst_type) +
hash(self._type_args) + hash(self._type_kwargs))
class Inline(MemField):
"""Field used to inline a MemStruct in another MemStruct. Equivalent to
having a struct field in a C struct.
Concretely:
class MyStructClass(MemStruct):
fields = [("f1", Num("I")), ("f2", Num("I"))]
class Example(MemStruct):
fields = [("mystruct", Inline(MyStructClass))]
ex = Example(vm, addr)
ex.mystruct.f2 = 3 # inlined structure field access
ex.mystruct = MyStructClass(vm, addr2) # struct copy
It can be seen like a bridge to use a MemStruct as a MemField
TODO: make the Inline implicit when setting a field to be a MemStruct
"""
def __init__(self, inlined_type, *type_args, **type_kwargs):
if not issubclass(inlined_type, MemStruct):
raise ValueError("inlined type if Inline must be a MemStruct")
self._il_type = inlined_type
self._type_args = type_args
self._type_kwargs = type_kwargs
def set(self, vm, addr, val):
raw = str(val)
vm.set_mem(addr, raw)
def get(self, vm, addr):
return self._il_type(vm, addr)
def size(self):
return self._il_type.sizeof()
def __repr__(self):
return "%s(%r)" % (self.__class__.__name__, self._il_type)
def __eq__(self, other):
return self.__class__ == other.__class__ and \
self._il_type == other._il_type and \
self._type_args == other._type_args and \
self._type_kwargs == other._type_kwargs
def __hash__(self):
return hash(hash(self.__class__) + hash(self._il_type) +
hash(self._type_args) + hash(self._type_kwargs))
class Array(MemField):
"""A fixed size array (contiguous sequence) of a MemField subclass
elements. Similar to something like the char[10] type in C.
Getting an array field actually returns a MemSizedArray. Setting it is
possible with either a list or a MemSizedArray instance. Examples of syntax:
class Example(MemStruct):
fields = [("array", Array(Num("B"), 4))]
mystruct = Example(vm, addr)
mystruct.array[3] = 27
mystruct.array = [1, 4, 8, 9]
mystruct.array = MemSizedArray(vm, addr2, Num("B"), 4)
"""
def __init__(self, field_type, array_len):
self.field_type = field_type
self.array_len = array_len
def _set_self_type(self, self_type):
super(Array, self)._set_self_type(self_type)
self.field_type._set_self_type(self_type)
def set(self, vm, addr, val):
# MemSizedArray assignment
if isinstance(val, MemSizedArray):
if val.array_len != self.array_len or len(val) != self.size():
raise ValueError("Size mismatch in MemSizedArray assignment")
raw = str(val)
vm.set_mem(addr, raw)
# list assignment
elif isinstance(val, list):
if len(val) != self.array_len:
raise ValueError("Size mismatch in MemSizedArray assignment ")
offset = 0
for elt in val:
self.field_type.set(vm, addr + offset, elt)
offset += self.field_type.size()
else:
raise NotImplementedError(
"Assignment only implemented for list and MemSizedArray")
def get(self, vm, addr):
return MemSizedArray(vm, addr, self.field_type, self.array_len)
def size(self):
return self.field_type.size() * self.array_len
def __repr__(self):
return "%r[%s]" % (self.field_type, self.array_len)
def __eq__(self, other):
return self.__class__ == other.__class__ and \
self.field_type == other.field_type and \
self.array_len == other.array_len
def __hash__(self):
return hash(hash(self.__class__) + hash(self.field_type) +
hash(self.array_len))
class Union(MemField):
"""Allows to put multiple fields at the same offset in a MemStruct, similar
to unions in C. The Union will have the size of the largest of its fields.
Example:
class Example(MemStruct):
fields = [("uni", Union([
("f1", Num("<B")),
("f2", Num("<H"))
])
)]
ex = Example(vm, addr)
ex.f2 = 0x1234
assert ex.f1 == 0x34
assert ex.uni == '\x34\x12'
assert ex.get_addr("f1") == ex.get_addr("f2")
"""
def __init__(self, field_list):
"""field_list is a [(name, field)] list, see the class doc"""
self.field_list = field_list
def size(self):
return max(field.size() for _, field in self.field_list)
def set(self, vm, addr, val):
if not isinstance(val, str) or not len(str) == self.size():
raise ValueError("Union can only be set with raw str of the Union's"
" size")
vm.set_mem(vm, addr, val)
def get(self, vm, addr):
return vm.get_mem(addr, self.size())
def __repr__(self):
fields_repr = ', '.join("%s: %r" % (name, field)
for name, field in self.field_list)
return "%s(%s)" % (self.__class__.__name__, fields_repr)
def __eq__(self, other):
return self.__class__ == other.__class__ and \
self.field_list == other.field_list
def __hash__(self):
return hash(hash(self.__class__) + hash(self.field_list))
class Bits(MemField):
"""Helper class for BitField, not very useful on its own. Represents some
bits of a Num.
The @backing_num is used to know how to serialize/deserialize data in vm,
but getting/setting this fields only affects bits from @bit_offset to
@bit_offset + @bits. Masking and shifting is handled by the class, the aim
is to provide a transparent way to set and get some bits of a num.
"""
def __init__(self, backing_num, bits, bit_offset):
if not isinstance(backing_num, Num):
raise ValueError("backing_num should be a Num instance")
self._num = backing_num
self._bits = bits
self._bit_offset = bit_offset
def set(self, vm, addr, val):
val_mask = (1 << self._bits) - 1
val_shifted = (val & val_mask) << self._bit_offset
num_size = self._num.size() * 8
full_num_mask = (1 << num_size) - 1
num_mask = (~(val_mask << self._bit_offset)) & full_num_mask
num_val = self._num.get(vm, addr)
res_val = (num_val & num_mask) | val_shifted
self._num.set(vm, addr, res_val)
def get(self, vm, addr):
val_mask = (1 << self._bits) - 1
num_val = self._num.get(vm, addr)
res_val = (num_val >> self._bit_offset) & val_mask
return res_val
def size(self):
return self._num.size()
@property
def bit_size(self):
"""Number of bits read/written by this class"""
return self._bits
@property
def bit_offset(self):
"""Offset in bits (beginning at 0, the LSB) from which to read/write
bits.
"""
return self._bit_offset
def __repr__(self):
return "%s%r(%d:%d)" % (self.__class__.__name__, self._num,
self._bit_offset, self._bit_offset + self._bits)
def __eq__(self, other):
return self.__class__ == other.__class__ and \
self._num == other._num and self._bits == other._bits and \
self._bit_offset == other._bit_offset
def __hash__(self):
return hash(hash(self.__class__) + hash(self._num) + hash(self._bits) +
hash(self._bit_offset))
class BitField(Union):
"""A C-like bitfield.
Constructed with a list [(<field_name>, <number_of_bits>)] and a
@backing_num. The @backing_num is a Num instance that determines the total
size of the bitfield and the way the bits are serialized/deserialized (big
endian int, little endian short...). Can be seen (and implemented) as a
Union of Bits fields.
Creates fields that allow to access the bitfield fields easily. Example:
class Example(MemStruct):
fields = [("bf", BitField(Num("B"), [
("f1", 2),
("f2", 4),
("f3", 1)
])
)]
ex = Example(vm, addr)
ex.memset()
ex.f2 = 2
ex.f1 = 5 # 5 does not fit on two bits, it will be binarily truncated
assert ex.f1 == 3
assert ex.f2 == 2
assert ex.f3 == 0 # previously memset()
assert ex.bf == 3 + 2 << 2
"""
def __init__(self, backing_num, bit_list):
"""@backing num: Num intance, @bit_list: [(name, n_bits)]"""
self._num = backing_num
fields = []
offset = 0
for name, bits in bit_list:
fields.append((name, Bits(self._num, bits, offset)))
offset += bits
if offset > self._num.size() * 8:
raise ValueError("sum of bit lengths is > to the backing num size")
super(BitField, self).__init__(fields)
def set(self, vm, addr, val):
self._num.set(vm, addr, val)
def get(self, vm, addr):
return self._num.get(vm, addr)
def __eq__(self, other):
return self.__class__ == other.__class__ and \
self._num == other._num and super(BitField, self).__eq__(other)
def __hash__(self):
return hash(super(BitField, self).__hash__() + hash(self._num))
# MemStruct classes
class _MetaMemStruct(type):
"""MemStruct metaclass. Triggers the magic that generates the class fields
from the cls.fields list.
Just calls MemStruct.gen_fields(), the actual implementation can seen be
there.
"""
def __init__(cls, name, bases, dct):
super(_MetaMemStruct, cls).__init__(name, bases, dct)
cls.gen_fields()
def __repr__(cls):
return cls.__name__
class MemStruct(object):
"""Base class to implement VmMngr backed C-like structures in miasm.
The mechanism is the following:
- set a "fields" class field to be a list of
(<field_name (str)>, <MemField_subclass_instance>)
- instances of this class will have properties to interract with these
fields.
Example:
class Example(MemStruct):
fields = [
# Number field: just struct.pack fields with one value
("num", Num("I")),
("flags", Num("B")),
# Ptr fields are Num, but they can also be dereferenced
# (self.deref_<field>). Deref can be read and set.
("other", Ptr("I", OtherStruct)),
("i", Ptr("I", Num("I"))),
# Ptr to a variable length String
("s", Ptr("I", MemStr)),
]
mstruct = MyStruct(vm, addr)
# Field assignment modifies virtual memory
mstruct.num = 3
assert mstruct.num == 3
memval = struct.unpack("I", vm.get_mem(mstruct.get_addr(),
4))[0]
assert memval == mstruct.num
# Memset sets the whole structure
mstruct.memset()
assert mstruct.num == 0
mstruct.memset('\x11')
assert mstruct.num == 0x11111111
other = OtherStruct(vm, addr2)
mstruct.other = other.get_addr()
assert mstruct.other == other.get_addr()
assert mstruct.deref_other == other
assert mstruct.deref_other.foo == 0x1234
See the various MemField doc for more information.
"""
__metaclass__ = _MetaMemStruct
fields = []
_size = None
# Classic usage methods
def __init__(self, vm, addr=None, *args, **kwargs):
global allocator
super(MemStruct, self).__init__(*args, **kwargs)
self._vm = vm
if addr is None:
if allocator is None:
raise ValueError("Cannot provide None address to MemStruct() if"
"%s.allocator is not set." % __name__)
self._addr = allocator(vm, self.get_size())
else:
self._addr = addr
def get_addr(self, field_name=None):
"""Return the address of this MemStruct or one of its fields.
Args:
field_name: (str, optional) the name of the field to get the
address of
"""
if field_name is not None:
if field_name not in self._attrs:
raise ValueError("This structure has no %s field" % field_name)
offset = self._attrs[field_name]['offset']
else:
offset = 0
return self._addr + offset
@classmethod
def sizeof(cls):
"""Return the static size of this structure, when available (it is the
case by default).
"""
# Child classes can set cls._size if their size is not the sum of
# their fields
if cls._size is None:
return sum(a["field"].size() for a in cls._attrs.itervalues())
return cls._size
def get_size(self):
"""Return the dynamic size of this structure (e.g. the size of an
instance). Defaults to sizeof for this base class.
For example, MemSizedArray defines get_size but not sizeof, as an
instance has a fixed size (because it has a fixed length and
field_type), but all the instance do not have the same size.
"""
return self.sizeof()
def get_field_type(self, name):
"""Return the MemField subclass instance describing field @name."""
return self._attrs[name]['field']
def get_field(self, name):
"""Get a field value by name.
Useless most of the time since fields are accessible via self.<name>.
"""
if name not in self._attrs:
raise AttributeError("'%s' object has no attribute '%s'"
% (self.__class__.__name__, name))
field = self._attrs[name]["field"]
offset = self._attrs[name]["offset"]
return field.get(self._vm, self.get_addr() + offset)
def set_field(self, name, val):
"""Set a field value by name. @val is the python value corresponding to
this field type.
Useless most of the time since fields are accessible via self.<name>.
"""
if name not in self._attrs:
raise AttributeError("'%s' object has no attribute '%s'"
% (self.__class__.__name__, name))
field = self._attrs[name]["field"]
offset = self._attrs[name]["offset"]
field.set(self._vm, self.get_addr() + offset, val)
def deref_field(self, name):
"""Get the MemStruct pointed by <name> field.
Useless most of the time since fields are accessible via
self.deref_<name>.
"""
addr = self.get_field(name)
field = self._attrs[name]["field"]
assert isinstance(field, Ptr),\
"Programming error: field should be a Ptr"
return field.deref_get(self._vm, addr)
def set_deref_field(self, name, val):
"""Set the MemStruct pointed by <name> field. @val should be of the
type of the pointed MemStruct. The field must be a Ptr.
Useless most of the time since fields are accessible via
self.deref_<name>.
"""
addr = self.get_field(name)
field = self._attrs[name]["field"]
assert isinstance(field, Ptr),\
"Programming error: field should be a Ptr"
field.deref_set(self._vm, addr, val)
def memset(self, byte='\x00'):
"""Fill the memory space of this MemStruct with @byte ('\x00' by
default). The size is retrieved with self.get_size() (dynamic size).
"""
if not isinstance(byte, str) or not len(byte) == 1:
raise ValueError("byte must be a 1-lengthed str")
self._vm.set_mem(self.get_addr(), byte * self.get_size())
def cast(self, other_type, *type_args, **type_kwargs):
"""Cast this MemStruct to another MemStruct (same address, same vm, but
different type). Return the casted MemStruct.
"""
return self.cast_field(None, other_type, *type_args, **type_kwargs)
def cast_field(self, field_name, other_type, *type_args, **type_kwargs):
"""Same as cast, but the address of the returned MemStruct is the
address at which @field_name is in the current MemStruct.
"""
return other_type(self._vm, self.get_addr(field_name),
*type_args, **type_kwargs)
def __len__(self):
return self.get_size()
def raw(self):
"""Raw binary (str) representation of the MemStruct as it is in
memory.
"""
attrs = sorted(self._attrs.itervalues(), key=lambda a: a["offset"])
out = []
for attr in attrs:
field = attr["field"]
offset = attr["offset"]
out.append(self._vm.get_mem(self.get_addr() + offset, field.size()))
return ''.join(out)
def __str__(self):
return self.raw()
def __repr__(self):
attrs = sorted(self._attrs.iteritems(), key=lambda a: a[1]["offset"])
out = []
for name, attr in attrs:
field = attr["field"]
val_repr = repr(self.get_field(name))
if '\n' in val_repr:
val_repr = '\n' + indent(val_repr, 4)
out.append("%s: %r = %s" % (name, field, val_repr))
return '%r:\n' % self.__class__ + indent('\n'.join(out), 2)
def __eq__(self, other):
# Do not test class equality, because of dynamically generated fields
# self.__class__ == other.__class__ and
# Could test attrs?
# TODO: self._attrs == other._attrs and
return str(self) == str(other)
def __ne__(self, other):
return not self == other
# Field generation methods, voluntarily public to be able to regen fields
# after class definition
@classmethod
def gen_fields(cls, fields=None):
"""Generate the fields of this class (so that they can be accessed with
self.<field_name>) from a @fields list, as described in the class doc.
Useful in case of a type cyclic dependency. For example, the following
is not possible in python:
class A(MemStruct):
fields = [("b", Ptr("I", B))]
class B(MemStruct):
fields = [("a", Ptr("I", A))]
With gen_fields, the following is the legal equivalent:
class A(MemStruct):
pass
class B(MemStruct):
fields = [("a", Ptr("I", A))]
A.fields = [("b", Ptr("I", B))]
a.gen_field()
"""
if fields is None:
fields = cls.fields
cls._attrs = {}
offset = 0
for name, field in cls.fields:
# For reflexion
field._set_self_type(cls)
cls.gen_field(name, field, offset)
offset += field.size()
cls._size = offset
@classmethod
def gen_field(cls, name, field, offset):
"""Generate only one field
Args:
@name: (str) the name of the field
@field: (MemField instance) the field type
@offset: (int) the offset of the field in the structure
"""
cls._gen_simple_attr(name, field, offset)
if isinstance(field, Union):
cls._gen_union_attr(field, offset)
@classmethod
def _gen_simple_attr(cls, name, field, offset):
cls._attrs[name] = {"field": field, "offset": offset}
# Generate self.<name> getter and setter
setattr(cls, name, property(
lambda self: self.get_field(name),
lambda self, val: self.set_field(name, val)
))
# Generate self.deref_<name> getter and setter if this field is a
# Ptr
if isinstance(field, Ptr):
setattr(cls, "deref_%s" % name, property(
lambda self: self.deref_field(name),
lambda self, val: self.set_deref_field(name, val)
))
@classmethod
def _gen_union_attr(cls, union_field, offset):
if not isinstance(union_field, Union):
raise ValueError("field should be an Union instance")
for name, field in union_field.field_list:
cls.gen_field(name, field, offset)
class MemSelf(MemStruct):
"""Special Marker class for reference to current class in a Ptr or Array
(mostly Array of Ptr).
Example:
class ListNode(MemStruct):
fields = [
("next", Ptr("<I", MemSelf)),
("data", Ptr("<I", MemVoid)),
]
"""
pass
class MemVoid(MemStruct):
"""Placeholder for e.g. Ptr to an undetermined type. Useful mostly when
casted to another type. Allows to implement C's "void*" pattern.
"""
def __repr__(self):
return self.__class__.__name__
# This does not use _MetaMemStruct features, impl is custom for strings,
# because they are unsized. The only memory field is self.value.
class MemStr(MemStruct):
"""Implements a string representation in memory.
The @encoding is passed to the constructor, and is currently either null
terminated "ansi" (latin1) or (double) null terminated "utf16". Be aware
that the utf16 implementation is a bit buggy...
The string value can be got or set (with python str/unicode) through the
self.value attribute. String encoding/decoding is handled by the class.
This type is dynamically sized only (get_size is implemented, not sizeof).
"""
def __init__(self, vm, addr, encoding="ansi"):
# TODO: encoding as lambda
if encoding not in ["ansi", "utf16"]:
raise NotImplementedError("Only 'ansi' and 'utf16' are implemented")
super(MemStr, self).__init__(vm, addr)
self._enc = encoding
@property
def value(self):
"""Set the string value in memory"""
if self._enc == "ansi":
get_str = get_str_ansi
elif self._enc == "utf16":
get_str = get_str_utf16
else:
raise NotImplementedError("Only 'ansi' and 'utf16' are implemented")
return get_str(self._vm, self.get_addr())
@value.setter
def value(self, s):
"""Get the string value from memory"""
if self._enc == "ansi":
set_str = set_str_ansi
elif self._enc == "utf16":
set_str = set_str_utf16
else:
raise NotImplementedError("Only 'ansi' and 'utf16' are implemented")
set_str(self._vm, self.get_addr(), s)
def get_size(self):
"""This get_size implementation is quite unsafe: it reads the string
underneath to determine the size, it may therefore read a lot of memory
and provoke mem faults (analogous to strlen).
"""
val = self.value
if self._enc == "ansi":
return len(val) + 1
elif self._enc == "utf16":
# FIXME: real encoding...
return len(val) * 2 + 2
else:
raise NotImplementedError("Only 'ansi' and 'utf16' are implemented")
def raw(self):
raw = self._vm.get_mem(self.get_addr(), self.get_size())
return raw
def __repr__(self):
return "%r(%s): %r" % (self.__class__, self._enc, self.value)
class MemArray(MemStruct):
"""An unsized array of type @field_type (a MemField subclass instance).
This class has no static or dynamic size.
It can be indexed for setting and getting elements, example:
array = MemArray(vm, addr, Num("I"))
array[2] = 5
array[4:8] = [0, 1, 2, 3]
print array[20]
If the @field_type is a Ptr, deref_get(index) and deref_set(index) can be
used to dereference a field at a given index in the array.
mem_array_type can be used to generate a type that includes the field_type.
Such a generated type can be instanciated with only vm and addr, as are
other MemStructs.
"""
_field_type = None
def __init__(self, vm, addr=None, field_type=None):
if self._field_type is None:
self._field_type = field_type
if self._field_type is None:
raise NotImplementedError(
"Provide field_type to instanciate this class, "
"or generate a subclass with mem_array_type.")
super(MemArray, self).__init__(vm, addr)
@property
def field_type(self):
"""Return the MemField subclass instance that represents the type of
this MemArray items.
"""
return self._field_type
def _normalize_idx(self, idx):
# Noop for this type
return idx
def _normalize_slice(self, slice_):
start = slice_.start if slice_.start is not None else 0
stop = slice_.stop if slice_.stop is not None else self.get_size()
step = slice_.step if slice_.step is not None else 1
return slice(start, stop, step)
def _check_bounds(self, idx):
idx = self._normalize_idx(idx)
if not isinstance(idx, int) and not isinstance(idx, long):
raise ValueError("index must be an int or a long")
if idx < 0:
raise IndexError("Index %s out of bounds" % idx)
def index2addr(self, idx):
"""Return the address corresponding to a given @index in this MemArray.
"""
self._check_bounds(idx)
addr = self.get_addr() + idx * self._field_type.size()
return addr
def __getitem__(self, idx):
if isinstance(idx, slice):
res = []
idx = self._normalize_slice(idx)
for i in xrange(idx.start, idx.stop, idx.step):
res.append(self._field_type.get(self._vm, self.index2addr(i)))
return res
else:
return self._field_type.get(self._vm, self.index2addr(idx))
def deref_get(self, idx):
"""If self.field_type is a Ptr, return the MemStruct self[idx] points
to.
"""
return self._field_type.deref_get(self._vm, self[idx])
def __setitem__(self, idx, item):
if isinstance(idx, slice):
idx = self._normalize_slice(idx)
if len(item) != len(xrange(idx.start, idx.stop, idx.step)):
raise ValueError("Mismatched lengths in slice assignment")
for i, val in zip(xrange(idx.start, idx.stop, idx.step), item):
self._field_type.set(self._vm, self.index2addr(i), val)
else:
self._field_type.set(self._vm, self.index2addr(idx), item)
def deref_set(self, idx, item):
"""If self.field_type is a Ptr, set the MemStruct self[idx] points
to with @item.
"""
self._field_type.deref_set(self._vm, self[idx], item)
# just a shorthand
def as_mem_str(self, encoding="ansi"):
return self.cast(MemStr, encoding)
@classmethod
def sizeof(cls):
raise ValueError("%s is unsized, which makes some operations"
" impossible. Use MemSizedArray instead.")
def raw(self):
raise ValueError("%s is unsized, which makes some operations"
" impossible. Use MemSizedArray instead.")
def __repr__(self):
return "[%r, ...] [%r]" % (self[0], self._field_type)
def mem_array_type(field_type):
"""Generate a MemArray subclass that has a fixed @field_type. It allows to
instanciate this class with only vm and addr argument, as are standard
MemStructs.
"""
array_type = type('MemArray_%r' % (field_type,),
(MemArray,),
{'_field_type': field_type})
return array_type
class MemSizedArray(MemArray):
"""A fixed size MemArray. Its additional arg represents the @array_len (in
number of elements) of this array.
This type is dynamically sized. Use mem_sized_array_type to generate a
fixed @field_type and @array_len array which has a static size.
"""
_array_len = None
def __init__(self, vm, addr=None, field_type=None, array_len=None):
# Set the length before anything else to allow get_size() to work for
# allocation
if self._array_len is None:
self._array_len = array_len
super(MemSizedArray, self).__init__(vm, addr, field_type)
if self._array_len is None or self._field_type is None:
raise NotImplementedError(
"Provide field_type and array_len to instanciate this class, "
"or generate a subclass with mem_sized_array_type.")
@property
def array_len(self):
"""The length, in number of elements, of this array."""
return self._array_len
def sizeof(cls):
raise ValueError("MemSizedArray is not statically sized. Use "
"mem_sized_array_type to generate a type that is.")
def get_size(self):
return self._array_len * self._field_type.size()
def _normalize_idx(self, idx):
if idx < 0:
return self.get_size() - idx
return idx
def _check_bounds(self, idx):
if not isinstance(idx, int) and not isinstance(idx, long):
raise ValueError("index must be an int or a long")
if idx < 0 or idx >= self.get_size():
raise IndexError("Index %s out of bounds" % idx)
def __iter__(self):
for i in xrange(self._array_len):
yield self[i]
def raw(self):
return self._vm.get_mem(self.get_addr(), self.get_size())
def __repr__(self):
item_reprs = [repr(item) for item in self]
if self.array_len > 0 and '\n' in item_reprs[0]:
items = '\n' + indent(',\n'.join(item_reprs), 2) + '\n'
else:
items = ', '.join(item_reprs)
return "[%s] [%r; %s]" % (items, self._field_type, self._array_len)
def mem_sized_array_type(field_type, array_len):
"""Generate a MemSizedArray subclass that has a fixed @field_type and a
fixed @array_len. This allows to instanciate the returned type with only
the vm and addr arguments, as are standard MemStructs.
"""
@classmethod
def sizeof(cls):
return cls._field_type.size() * cls._array_len
array_type = type('MemSizedArray_%r_%s' % (field_type, array_len),
(MemSizedArray,),
{'_array_len': array_len,
'_field_type': field_type,
'sizeof': sizeof})
return array_type
|