diff options
| author | Fabrice Desclaux <fabrice.desclaux@cea.fr> | 2019-02-25 11:09:54 +0100 |
|---|---|---|
| committer | Fabrice Desclaux <fabrice.desclaux@cea.fr> | 2019-03-05 16:52:49 +0100 |
| commit | 02bbb30efea4980c9d133947cbbf69fb599071ad (patch) | |
| tree | 3fea6826fcc5354840a27cb1dc99ff31eef81896 /miasm2/core | |
| parent | eab809932871f91d6f4aa770fc321af9e156e0f5 (diff) | |
| download | miasm-02bbb30efea4980c9d133947cbbf69fb599071ad.tar.gz miasm-02bbb30efea4980c9d133947cbbf69fb599071ad.zip | |
Support python2/python3
Diffstat (limited to 'miasm2/core')
| -rw-r--r-- | miasm2/core/asm_ast.py | 4 | ||||
| -rw-r--r-- | miasm2/core/asmblock.py | 73 | ||||
| -rw-r--r-- | miasm2/core/bin_stream.py | 38 | ||||
| -rw-r--r-- | miasm2/core/bin_stream_ida.py | 10 | ||||
| -rw-r--r-- | miasm2/core/cpu.py | 112 | ||||
| -rw-r--r-- | miasm2/core/ctypesmngr.py | 6 | ||||
| -rw-r--r-- | miasm2/core/graph.py | 39 | ||||
| -rw-r--r-- | miasm2/core/interval.py | 8 | ||||
| -rw-r--r-- | miasm2/core/locationdb.py | 46 | ||||
| -rw-r--r-- | miasm2/core/objc.py | 158 | ||||
| -rw-r--r-- | miasm2/core/parse_asm.py | 24 | ||||
| -rw-r--r-- | miasm2/core/sembuilder.py | 22 | ||||
| -rw-r--r-- | miasm2/core/types.py | 70 | ||||
| -rw-r--r-- | miasm2/core/utils.py | 84 |
14 files changed, 455 insertions, 239 deletions
diff --git a/miasm2/core/asm_ast.py b/miasm2/core/asm_ast.py index 3b06ce62..69ff1f9c 100644 --- a/miasm2/core/asm_ast.py +++ b/miasm2/core/asm_ast.py @@ -1,3 +1,5 @@ +from builtins import int as int_types + class AstNode(object): """ Ast node object @@ -68,7 +70,7 @@ class AstMem(AstNode): """ def __init__(self, ptr, size): assert isinstance(ptr, AstNode) - assert isinstance(size, (int, long)) + assert isinstance(size, int_types) self.ptr = ptr self.size = size diff --git a/miasm2/core/asmblock.py b/miasm2/core/asmblock.py index f6e68a0e..811cc824 100644 --- a/miasm2/core/asmblock.py +++ b/miasm2/core/asmblock.py @@ -1,8 +1,13 @@ #-*- coding:utf-8 -*- +from builtins import map +from builtins import range import logging import warnings from collections import namedtuple +from builtins import int as int_types + +from future.utils import viewitems, viewvalues from miasm2.expression.expression import ExprId, ExprInt, get_expr_locs from miasm2.expression.expression import LocKey @@ -22,14 +27,12 @@ log_asmblock.setLevel(logging.WARNING) def is_int(a): - return isinstance(a, int) or isinstance(a, long) or \ - isinstance(a, moduint) or isinstance(a, modint) - + return isinstance(a, (modint, moduint, int_types)) class AsmRaw(object): - def __init__(self, raw=""): + def __init__(self, raw=b""): self.raw = raw def __str__(self): @@ -41,7 +44,7 @@ class AsmRaw(object): class asm_raw(AsmRaw): - def __init__(self, raw=""): + def __init__(self, raw=b""): warnings.warn('DEPRECATION WARNING: use "AsmRaw" instead of "asm_raw"') super(asm_label, self).__init__(raw) @@ -190,7 +193,8 @@ class AsmBlock(object): for xx in self.bto: log_asmblock.debug('lbl %s', xx) c_next = set( - [x for x in self.bto if x.c_t == AsmConstraint.c_next]) + x for x in self.bto if x.c_t == AsmConstraint.c_next + ) c_to = [x for x in self.bto if x.c_t != AsmConstraint.c_next] self.bto = set([c] + c_to) new_bloc.bto = c_next @@ -223,7 +227,7 @@ class AsmBlock(object): def get_flow_instr(self): if not self.lines: return None - for i in xrange(-1, -1 - self.lines[0].delayslot - 1, -1): + for i in range(-1, -1 - self.lines[0].delayslot - 1, -1): if not 0 <= i < len(self.lines): return None l = self.lines[i] @@ -236,7 +240,7 @@ class AsmBlock(object): delayslot = self.lines[0].delayslot end_index = len(self.lines) - 1 ds_max_index = max(end_index - delayslot, 0) - for i in xrange(end_index, ds_max_index - 1, -1): + for i in range(end_index, ds_max_index - 1, -1): l = self.lines[i] if l.is_subcall(): return l @@ -280,8 +284,10 @@ class AsmBlock(object): for constraint in self.bto: dests.setdefault(constraint.loc_key, set()).add(constraint) - self.bto = set(self._filter_constraint(constraints) - for constraints in dests.itervalues()) + self.bto = set( + self._filter_constraint(constraints) + for constraints in viewvalues(dests) + ) class asm_bloc(object): @@ -324,8 +330,10 @@ class AsmBlockBad(AsmBlock): def __str__(self): error_txt = self.ERROR_TYPES.get(self._errno, self._errno) - return "\n".join([str(self.loc_key), - "\tBad block: %s" % error_txt]) + return "%s\n\tBad block: %s" % ( + self.loc_key, + error_txt + ) def addline(self, *args, **kwargs): raise RuntimeError("An AsmBlockBad cannot have line") @@ -421,7 +429,9 @@ class AsmCFG(DiGraph): """Return the number of blocks in AsmCFG""" return len(self._nodes) - blocks = property(lambda x:x._loc_key_to_block.itervalues()) + @property + def blocks(self): + return viewvalues(self._loc_key_to_block) # Manage graph with associated constraints def add_edge(self, src, dst, constraint): @@ -536,7 +546,7 @@ class AsmCFG(DiGraph): def node2lines(self, node): if self.loc_db is None: - loc_key_name = str(node) + loc_key_name = node else: loc_key_name = self.loc_db.pretty_str(node) yield self.DotCellDescription(text=loc_key_name, @@ -545,7 +555,7 @@ class AsmCFG(DiGraph): 'bgcolor': 'grey'}) block = self._loc_key_to_block.get(node, None) if block is None: - raise StopIteration + return if isinstance(block, AsmBlockBad): yield [ self.DotCellDescription( @@ -554,7 +564,7 @@ class AsmCFG(DiGraph): ), attr={}) ] - raise StopIteration + return for line in block.lines: if self._dot_offset: yield [self.DotCellDescription(text="%.8X" % line.offset, @@ -700,14 +710,20 @@ class AsmCFG(DiGraph): """ if len(self._pendings) != 0: - raise RuntimeError("Some blocks are missing: %s" % map( - str, - self._pendings.keys() - )) + raise RuntimeError( + "Some blocks are missing: %s" % list( + map( + str, + self._pendings + ) + ) + ) - next_edges = {edge: constraint - for edge, constraint in self.edges2constraint.iteritems() - if constraint == AsmConstraint.c_next} + next_edges = { + edge: constraint + for edge, constraint in viewitems(self.edges2constraint) + if constraint == AsmConstraint.c_next + } for loc_key in self._nodes: if loc_key not in self._loc_key_to_block: @@ -740,8 +756,11 @@ class AsmCFG(DiGraph): if len(instr.raw) == 0: l = 0 else: - l = instr.raw[0].size / 8 * len(instr.raw) + l = (instr.raw[0].size // 8) * len(instr.raw) elif isinstance(instr.raw, str): + data = instr.raw.encode() + l = len(data) + elif isinstance(instr.raw, bytes): data = instr.raw l = len(data) else: @@ -1148,7 +1167,7 @@ def resolve_symbol(blockChains, loc_db, dst_interval=None): if chain.pinned: continue fixed = False - for i in xrange(1, len(fixed_chains)): + for i in range(1, len(fixed_chains)): prev_chain = fixed_chains[i - 1] next_chain = fixed_chains[i] @@ -1187,7 +1206,7 @@ def assemble_block(mnemo, block, loc_db, conservative=False): if isinstance(instr, AsmRaw): if isinstance(instr.raw, list): # Fix special AsmRaw - data = "" + data = b"" for expr in instr.raw: expr_int = fix_expr_val(expr, loc_db) data += pck[expr_int.size](expr_int.arg) @@ -1471,7 +1490,7 @@ class disasmEngine(object): # XXX TODO nul start block option if (self.dont_dis_nulstart_bloc and not cur_block.lines and - instr.b.count('\x00') == instr.l): + instr.b.count(b'\x00') == instr.l): log_asmblock.warning("reach nul instr at %X", int(off_i)) # Block is empty -> bad block cur_block = AsmBlockBad(loc_key, errno=AsmBlockBad.ERROR_NULL_STARTING_BLOCK) diff --git a/miasm2/core/bin_stream.py b/miasm2/core/bin_stream.py index af31a52c..4977e2ae 100644 --- a/miasm2/core/bin_stream.py +++ b/miasm2/core/bin_stream.py @@ -16,6 +16,9 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # +from builtins import str +from future.utils import PY3 + from miasm2.core.utils import BIG_ENDIAN, LITTLE_ENDIAN from miasm2.core.utils import upck8le, upck16le, upck32le, upck64le from miasm2.core.utils import upck8be, upck16be, upck32be, upck64be @@ -35,6 +38,11 @@ class bin_stream(object): def __repr__(self): return "<%s !!>" % self.__class__.__name__ + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + def hexdump(self, offset, l): return @@ -81,8 +89,8 @@ class bin_stream(object): # Get initial bytes if n > self.getlen() * 8: raise IOError('not enough bits %r %r' % (n, len(self.bin) * 8)) - byte_start = start / 8 - byte_stop = (start + n + 7) / 8 + byte_start = start // 8 + byte_stop = (start + n + 7) // 8 temp = self.getbytes(byte_start, byte_stop - byte_start) if not temp: raise IOError('cannot get bytes') @@ -92,8 +100,8 @@ class bin_stream(object): out = 0 while n: # Get needed bits, working on maximum 8 bits at a time - cur_byte_idx = start / 8 - new_bits = ord(temp[cur_byte_idx]) + cur_byte_idx = start // 8 + new_bits = ord(temp[cur_byte_idx:cur_byte_idx + 1]) to_keep = 8 - start % 8 new_bits &= (1 << to_keep) - 1 cur_len = min(to_keep, n) @@ -160,7 +168,7 @@ class bin_stream(object): class bin_stream_str(bin_stream): - def __init__(self, input_str="", offset=0L, base_address=0, shift=None): + def __init__(self, input_str=b"", offset=0, base_address=0, shift=None): bin_stream.__init__(self) if shift is not None: raise DeprecationWarning("use base_address instead of shift") @@ -185,9 +193,8 @@ class bin_stream_str(bin_stream): self.offset += l return self.bin[self.offset - l - self.base_address:self.offset - self.base_address] - def __str__(self): - out = self.bin[self.offset - self.base_address:] - return out + def __bytes__(self): + return self.bin[self.offset - self.base_address:] def setoffset(self, val): self.offset = val @@ -198,7 +205,7 @@ class bin_stream_str(bin_stream): class bin_stream_file(bin_stream): - def __init__(self, binary, offset=0L, base_address=0, shift=None): + def __init__(self, binary, offset=0, base_address=0, shift=None): bin_stream.__init__(self) if shift is not None: raise DeprecationWarning("use base_address instead of shift") @@ -222,8 +229,8 @@ class bin_stream_file(bin_stream): raise IOError("Negative offset") return self.bin.read(l) - def __str__(self): - return str(self.bin) + def __bytes__(self): + return self.bin.read() def getlen(self): return self.l - (self.offset - self.base_address) @@ -231,7 +238,7 @@ class bin_stream_file(bin_stream): class bin_stream_container(bin_stream): - def __init__(self, binary, offset=0L): + def __init__(self, binary, offset=0): bin_stream.__init__(self) self.bin = binary self.l = binary.virt.max_addr() @@ -257,9 +264,8 @@ class bin_stream_container(bin_stream): except ValueError: raise IOError("cannot get bytes") - def __str__(self): - out = self.bin.virt.get(self.offset, self.offset + self.l) - return out + def __bytes__(self): + return self.bin.virt.get(self.offset, self.offset + self.l) def setoffset(self, val): self.offset = val @@ -279,7 +285,7 @@ class bin_stream_elf(bin_stream_container): class bin_stream_vm(bin_stream): - def __init__(self, vm, offset=0L, base_offset=0L): + def __init__(self, vm, offset=0, base_offset=0): self.offset = offset self.base_offset = base_offset self.vm = vm diff --git a/miasm2/core/bin_stream_ida.py b/miasm2/core/bin_stream_ida.py index fcd89f9f..44cf9367 100644 --- a/miasm2/core/bin_stream_ida.py +++ b/miasm2/core/bin_stream_ida.py @@ -1,7 +1,9 @@ +from builtins import range from idc import Byte, SegEnd from idautils import Segments from idaapi import is_mapped +from miasm2.core.utils import int_to_byte from miasm2.core.bin_stream import bin_stream_str @@ -13,13 +15,13 @@ class bin_stream_ida(bin_stream_str): It can raise error on overflow 7FFFFFFF with 32 bit python """ def _getbytes(self, start, l=1): - o = "" - for ad in xrange(l): + out = [] + for ad in range(l): offset = ad + start + self.base_address if not is_mapped(offset): raise IOError("not enough bytes") - o += chr(Byte(offset)) - return o + out.append(int_to_byte(Byte(offset))) + return b''.join(out) def readbs(self, l=1): if self.offset + l > self.l: diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index c2fbd3cd..c24b693d 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -1,12 +1,17 @@ #-*- coding:utf-8 -*- +from builtins import range import re import struct import logging from collections import defaultdict + +from future.utils import viewitems, viewvalues + import pyparsing +from miasm2.core.utils import decode_hex import miasm2.expression.expression as m2_expr from miasm2.core.bin_stream import bin_stream, bin_stream_str from miasm2.core.utils import Disasm_Exception @@ -15,6 +20,7 @@ from miasm2.core.locationdb import LocationDB from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstOp +from future.utils import with_metaclass log = logging.getLogger("cpuhelper") console_handler = logging.StreamHandler() @@ -23,17 +29,15 @@ log.addHandler(console_handler) log.setLevel(logging.WARN) -class bitobj: +class bitobj(object): - def __init__(self, s=""): + def __init__(self, s=b""): if not s: bits = [] else: - bits = list(bin(int(str(s).encode('hex'), 16))[2:]) - bits = [int(x) for x in bits] + bits = [int(x) for x in bin(int(encode_hex(s), 16))[2:]] if len(bits) % 8: - bits = [0 for x in xrange(8 - (len(bits) % 8))] + bits - bits = ['0' for x in xrange(len(s) * 8 - len(bits))] + bits + bits = [0 for x in range(8 - (len(bits) % 8))] + bits self.bits = bits self.offset = 0 @@ -46,7 +50,7 @@ class bitobj: if n > len(self.bits) - self.offset: raise ValueError('not enough bits %r %r' % (n, len(self.bits))) b = self.bits[self.offset:self.offset + n] - b = int("".join([str(x) for x in b]), 2) + b = int("".join(str(x) for x in b), 2) self.offset += n return b @@ -55,17 +59,18 @@ class bitobj: return bits = list(bin(b)[2:]) bits = [int(x) for x in bits] - bits = [0 for x in xrange(n - len(bits))] + bits + bits = [0 for x in range(n - len(bits))] + bits self.bits += bits def tostring(self): if len(self.bits) % 8: raise ValueError( - 'num bits must be 8 bit aligned: %d' % len(self.bits)) - b = int("".join([str(x) for x in self.bits]), 2) + 'num bits must be 8 bit aligned: %d' % len(self.bits) + ) + b = int("".join(str(x) for x in self.bits), 2) b = "%X" % b - b = '0' * (len(self.bits) / 4 - len(b)) + b - b = b.decode('hex') + b = '0' * (len(self.bits) // 4 - len(b)) + b + b = decode_hex(b.encode()) return b def reset(self): @@ -113,10 +118,10 @@ class reg_info(object): class reg_info_dct(object): def __init__(self, reg_expr): - self.dct_str_inv = dict((v.name, k) for k, v in reg_expr.iteritems()) + self.dct_str_inv = dict((v.name, k) for k, v in viewitems(reg_expr)) self.dct_expr = reg_expr - self.dct_expr_inv = dict((v, k) for k, v in reg_expr.iteritems()) - reg_str = [v.name for v in reg_expr.itervalues()] + self.dct_expr_inv = dict((v, k) for k, v in viewitems(reg_expr)) + reg_str = [v.name for v in viewvalues(reg_expr)] self.parser = literal_list(reg_str).setParseAction(self.cb_parse) def cb_parse(self, tokens): @@ -412,11 +417,11 @@ def int2bin(i, l): def myror32(v, r): - return ((v & 0xFFFFFFFFL) >> r) | ((v << (32 - r)) & 0xFFFFFFFFL) + return ((v & 0xFFFFFFFF) >> r) | ((v << (32 - r)) & 0xFFFFFFFF) def myrol32(v, r): - return ((v & 0xFFFFFFFFL) >> (32 - r)) | ((v << r) & 0xFFFFFFFFL) + return ((v & 0xFFFFFFFF) >> (32 - r)) | ((v << r) & 0xFFFFFFFF) class bs(object): @@ -563,7 +568,7 @@ class bsi(object): def __hash__(self): kargs = [] - for k, v in self.kargs.items(): + for k, v in list(viewitems(self.kargs)): if isinstance(v, list): v = tuple(v) kargs.append((k, v)) @@ -595,7 +600,7 @@ class bs_name(bs_divert): def divert(self, i, candidates): out = [] for cls, _, bases, dct, fields in candidates: - for new_name, value in self.args['name'].iteritems(): + for new_name, value in viewitems(self.args['name']): nfields = fields[:] s = int2bin(value, self.args['l']) args = dict(self.args) @@ -620,7 +625,7 @@ class bs_mod_name(bs_divert): for j, v in enumerate(tab): tmp[j] = v tab = tmp - for value, new_name in tab.iteritems(): + for value, new_name in viewitems(tab): nfields = fields[:] s = int2bin(value, self.args['l']) args = dict(self.args) @@ -676,7 +681,7 @@ class m_arg(object): self.expr = e return start, stop try: - v, start, stop = self.parser.scanString(text).next() + v, start, stop = next(self.parser.scanString(text)) except StopIteration: return None, None arg = v[0] @@ -713,7 +718,7 @@ class reg_noarg(object): self.expr = e return start, stop try: - v, start, stop = self.parser.scanString(text).next() + v, start, stop = next(self.parser.scanString(text)) except StopIteration: return None, None arg = v[0] @@ -743,7 +748,7 @@ class reg_noarg(object): return v & self.fmask == self.fbits -class mn_prefix: +class mn_prefix(object): pass @@ -756,7 +761,7 @@ def swap32(v): def perm_inv(p): - o = [None for x in xrange(len(p))] + o = [None for x in range(len(p))] for i, x in enumerate(p): o[x] = i return o @@ -775,10 +780,10 @@ total_scans = 0 def branch2nodes(branch, nodes=None): if nodes is None: nodes = [] - for k, v in branch.items(): + for k, v in viewitems(branch): if not isinstance(v, dict): continue - for k2 in v.keys(): + for k2 in v: nodes.append((k, k2)) branch2nodes(v, nodes) @@ -789,7 +794,7 @@ def factor_one_bit(tree): new_keys = defaultdict(lambda: defaultdict(dict)) if len(tree) == 1: return tree - for k, v in tree.items(): + for k, v in viewitems(tree): if k == "mn": new_keys[k] = v continue @@ -806,15 +811,15 @@ def factor_one_bit(tree): if nk in new_keys[ck]: raise NotImplementedError('not fully functional') new_keys[ck][nk] = v - for k, v in new_keys.items(): + for k, v in list(viewitems(new_keys)): new_keys[k] = factor_one_bit(v) # try factor sons if len(new_keys) != 1: return new_keys - subtree = new_keys.values()[0] + subtree = next(iter(viewvalues(new_keys))) if len(subtree) != 1: return new_keys - if subtree.keys()[0] == 'mn': + if next(iter(subtree)) == 'mn': return new_keys return new_keys @@ -826,7 +831,7 @@ def factor_fields(tree): if len(tree) != 1: return tree # merge - k1, v1 = tree.items()[0] + k1, v1 = next(iter(viewitems(tree))) if k1 == "mn": return tree l1, fmask1, fbits1, fname1, flen1 = k1 @@ -839,7 +844,7 @@ def factor_fields(tree): return tree if len(v1) != 1: return tree - k2, v2 = v1.items()[0] + k2, v2 = next(iter(viewitems(v1))) if k2 == "mn": return tree l2, fmask2, fbits2, fname2, flen2 = k2 @@ -861,7 +866,7 @@ def factor_fields_all(tree): if not isinstance(tree, dict): return tree new_keys = {} - for k, v in tree.items(): + for k, v in viewitems(tree): v = factor_fields(v) new_keys[k] = factor_fields_all(v) return new_keys @@ -902,7 +907,7 @@ def add_candidate(bases, c): def getfieldby_name(fields, fname): - f = filter(lambda x: hasattr(x, 'fname') and x.fname == fname, fields) + f = [x for x in fields if hasattr(x, 'fname') and x.fname == fname] if len(f) != 1: raise ValueError('more than one field with name: %s' % fname) return f[0] @@ -1023,10 +1028,10 @@ class instruction(object): loc_key = exprloc.loc_key names = symbols.get_location_names(loc_key) # special symbols - if '$' in names: + if b'$' in names: fixed_expr[exprloc] = self.get_asm_offset(exprloc) continue - if '_' in names: + if b'_' in names: fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue arg_int = symbols.get_location_offset(loc_key) @@ -1059,8 +1064,7 @@ class instruction(object): return -class cls_mn(object): - __metaclass__ = metamn +class cls_mn(with_metaclass(metamn, object)): args_symb = [] instruction = instruction # Block's offset alignment @@ -1073,8 +1077,10 @@ class cls_mn(object): candidates = set() fname_values = pre_dis_info - todo = [(dict(fname_values), branch, offset * 8) - for branch in cls.bintree.items()] + todo = [ + (dict(fname_values), branch, offset * 8) + for branch in list(viewitems(cls.bintree)) + ] for fname_values, branch, offset_b in todo: (l, fmask, fbits, fname, flen), vals = branch @@ -1091,7 +1097,7 @@ class cls_mn(object): continue if fname is not None and not fname in fname_values: fname_values[fname] = v - for nb, v in vals.items(): + for nb, v in viewitems(vals): if 'mn' in nb: candidates.update(v) else: @@ -1128,7 +1134,7 @@ class cls_mn(object): setattr(self, f.fname, f) if hasattr(self, 'args_permut'): args = [args[self.args_permut[i]] - for i in xrange(len(self.args_permut))] + for i in range(len(self.args_permut))] to_decode.sort(key=lambda x: (x[1].order, x[0])) to_decode = [fields_order.index(f[1]) for f in to_decode] self.args = args @@ -1236,7 +1242,7 @@ class cls_mn(object): if not getok: continue - c.l = prefix_len + total_l / 8 + c.l = prefix_len + total_l // 8 for i in c.to_decode: f = c.fields_order[i] if f.is_present: @@ -1258,7 +1264,7 @@ class cls_mn(object): c_args = [a.expr for a in c.args] instr = cls.instruction(c.name, mode, c_args, additional_info=c.additional_info()) - instr.l = prefix_len + total_l / 8 + instr.l = prefix_len + total_l // 8 instr.b = cls.getbytes(bs, offset_o, instr.l) instr.offset = offset_o instr.get_info(c) @@ -1278,8 +1284,10 @@ class cls_mn(object): for i, o in enumerate(out_c): if o.alias: return out[i] - raise NotImplementedError('Multiple disas: \n' + - "\n".join([str(x) for x in out])) + raise NotImplementedError( + 'Multiple disas: \n' + + "\n".join(str(x) for x in out) + ) return out[0] @classmethod @@ -1317,7 +1325,7 @@ class cls_mn(object): continue try: total_scans += 1 - v, start, stop = p.scanString(args_str).next() + v, start, stop = next(p.scanString(args_str)) except StopIteration: v, start, stop = [None], None, None if start != 0: @@ -1396,7 +1404,7 @@ class cls_mn(object): continue # only fix args expr - for i in xrange(len(c.args)): + for i in range(len(c.args)): c.args[i].expr = args[i] v = c.value(instr.mode) @@ -1408,8 +1416,10 @@ class cls_mn(object): vals += v candidates.append((c, v)) if len(vals) == 0: - raise ValueError('cannot asm %r %r' % - (instr.name, [str(x) for x in instr.args])) + raise ValueError( + 'cannot asm %r %r' % + (instr.name, [str(x) for x in instr.args]) + ) if len(vals) != 1: log.debug('asm multiple args ret default') @@ -1571,7 +1581,7 @@ class imm_noarg(object): e, start, stop = parser_result[self.parser] else: try: - e, start, stop = self.parser.scanString(text).next() + e, start, stop = next(self.parser.scanString(text)) except StopIteration: return None, None if e == [None]: diff --git a/miasm2/core/ctypesmngr.py b/miasm2/core/ctypesmngr.py index 7dafd7e1..94c96f7e 100644 --- a/miasm2/core/ctypesmngr.py +++ b/miasm2/core/ctypesmngr.py @@ -522,15 +522,15 @@ class CAstTypes(object): if isinstance(ast, c_ast.BinaryOp): left = self.ast_eval_int(ast.left) right = self.ast_eval_int(ast.right) - is_pure_int = (isinstance(left, (int, long)) and - isinstance(right, (int, long))) + is_pure_int = (isinstance(left, int) and + isinstance(right, int)) if is_pure_int: if ast.op == '*': result = left * right elif ast.op == '/': assert left % right == 0 - result = left / right + result = left // right elif ast.op == '+': result = left + right elif ast.op == '-': diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py index e385b044..f585379b 100644 --- a/miasm2/core/graph.py +++ b/miasm2/core/graph.py @@ -1,4 +1,6 @@ from collections import defaultdict, namedtuple + +from future.utils import viewitems, viewvalues import re @@ -54,8 +56,9 @@ class DiGraph(object): def __eq__(self, graph): if not isinstance(graph, self.__class__): return False - return all((self._nodes == graph.nodes(), - sorted(self._edges) == sorted(graph.edges()))) + if self._nodes != graph.nodes(): + return False + return sorted(self._edges) == sorted(graph.edges()) def __ne__(self, other): return not self.__eq__(other) @@ -110,7 +113,7 @@ class DiGraph(object): def predecessors_iter(self, node): if not node in self._nodes_pred: - raise StopIteration + return for n_pred in self._nodes_pred[node]: yield n_pred @@ -119,7 +122,7 @@ class DiGraph(object): def successors_iter(self, node): if not node in self._nodes_succ: - raise StopIteration + return for n_suc in self._nodes_succ[node]: yield n_suc @@ -165,7 +168,7 @@ class DiGraph(object): if path and path[0] == src: out.append(path + [dst]) return out - + def find_path_from_src(self, src, dst, cycles_count=0, done=None): """ This function does the same as function find_path. @@ -177,7 +180,7 @@ class DiGraph(object): @done: dictionary of already processed loc_keys, it's value is number of times it was processed @out: list of paths from @src to @dst """ - + if done is None: done = {} if src == dst: @@ -229,10 +232,12 @@ class DiGraph(object): @staticmethod def _attr2str(default_attr, attr): - return ' '.join('%s="%s"' % (name, value) - for name, value in - dict(default_attr, - **attr).iteritems()) + return ' '.join( + '%s="%s"' % (name, value) + for name, value in + viewitems(dict(default_attr, + **attr)) + ) def dot(self): """Render dot graph with HTML""" @@ -277,8 +282,10 @@ class DiGraph(object): for src, dst in self.edges(): attrs = self.edge_attr(src, dst) - attrs = ' '.join('%s="%s"' % (name, value) - for name, value in attrs.iteritems()) + attrs = ' '.join( + '%s="%s"' % (name, value) + for name, value in viewitems(attrs) + ) out.append('%s -> %s' % (self.nodeid(src), self.nodeid(dst)) + '[' + attrs + '];') @@ -304,7 +311,7 @@ class DiGraph(object): def predecessors_stop_node_iter(self, node, head): if node == head: - raise StopIteration + return for next_node in self.predecessors_iter(node): yield next_node @@ -515,7 +522,7 @@ class DiGraph(object): frontier = {} for node in idoms: - if self._nodes_pred[node] >= 2: + if len(self._nodes_pred[node]) >= 2: for predecessor in self.predecessors_iter(node): runner = predecessor if runner not in idoms: @@ -894,7 +901,7 @@ class MatchGraph(DiGraph): standing for a partial solution """ # Avoid having 2 different joker for the same node - if partial_sol and candidate in partial_sol.values(): + if partial_sol and candidate in viewvalues(partial_sol): return False # Check lambda filtering @@ -1008,5 +1015,3 @@ class MatchGraph(DiGraph): MatchGraph._propagate_successors) self._propagate_sol(node, partial_sol, graph, todo, MatchGraph._propagate_predecessors) - - raise StopIteration diff --git a/miasm2/core/interval.py b/miasm2/core/interval.py index 3fde83ad..06dc546f 100644 --- a/miasm2/core/interval.py +++ b/miasm2/core/interval.py @@ -1,3 +1,5 @@ +from __future__ import print_function + INT_EQ = 0 # Equivalent INT_B_IN_A = 1 # B in A INT_A_IN_B = -1 # A in B @@ -232,16 +234,16 @@ class interval(object): import Image import ImageDraw except ImportError: - print 'cannot import python PIL imaging' + print('cannot import python PIL imaging') return img = Image.new('RGB', (img_x, img_y), (100, 100, 100)) draw = ImageDraw.Draw(img) i_min, i_max = self.hull() - print hex(i_min), hex(i_max) + print(hex(i_min), hex(i_max)) - addr2x = lambda addr: (addr - i_min) * img_x / (i_max - i_min) + addr2x = lambda addr: ((addr - i_min) * img_x) // (i_max - i_min) for a, b in self.intervals: draw.rectangle((addr2x(a), 0, addr2x(b), img_y), (200, 0, 0)) diff --git a/miasm2/core/locationdb.py b/miasm2/core/locationdb.py index 4c5da29e..906a247a 100644 --- a/miasm2/core/locationdb.py +++ b/miasm2/core/locationdb.py @@ -1,11 +1,16 @@ import warnings +from builtins import int as int_types +from functools import reduce +from future.utils import viewitems, viewvalues + +from miasm2.core.utils import printable, force_bytes from miasm2.expression.expression import LocKey, ExprLoc from miasm2.expression.modint import moduint, modint def is_int(a): - return isinstance(a, (int, long, moduint, modint)) + return isinstance(a, (int_types, moduint, modint)) class LocationDB(object): @@ -85,6 +90,7 @@ class LocationDB(object): Return the LocKey of @name if any, None otherwise. @name: target name """ + name = force_bytes(name) return self._name_to_loc_key.get(name) def get_or_create_name_location(self, name): @@ -92,6 +98,7 @@ class LocationDB(object): Return the LocKey of @name if any, create one otherwise. @name: target name """ + name = force_bytes(name) loc_key = self._name_to_loc_key.get(name) if loc_key is not None: return loc_key @@ -100,7 +107,7 @@ class LocationDB(object): def get_offset_location(self, offset): """ Return the LocKey of @offset if any, None otherwise. - @name: target offset + @offset: target offset """ return self._offset_to_loc_key.get(offset) @@ -119,6 +126,7 @@ class LocationDB(object): Return the offset of @name if any, None otherwise. @name: target name """ + name = force_bytes(name) loc_key = self.get_name_location(name) if loc_key is None: return None @@ -129,6 +137,7 @@ class LocationDB(object): @name: str instance @loc_key: LocKey instance """ + name = force_bytes(name) assert loc_key in self._loc_keys already_existing_loc = self._name_to_loc_key.get(name) if already_existing_loc is not None and already_existing_loc != loc_key: @@ -144,6 +153,7 @@ class LocationDB(object): @loc_key: LocKey instance """ assert loc_key in self._loc_keys + name = force_bytes(name) already_existing_loc = self._name_to_loc_key.get(name) if already_existing_loc is None: raise KeyError("%r is not already associated" % name) @@ -195,13 +205,13 @@ class LocationDB(object): """Ensure internal structures are consistent with each others""" assert set(self._loc_key_to_names).issubset(self._loc_keys) assert set(self._loc_key_to_offset).issubset(self._loc_keys) - assert self._loc_key_to_offset == {v: k for k, v in self._offset_to_loc_key.iteritems()} + assert self._loc_key_to_offset == {v: k for k, v in viewitems(self._offset_to_loc_key)} assert reduce( lambda x, y:x.union(y), - self._loc_key_to_names.itervalues(), + viewvalues(self._loc_key_to_names), set(), ) == set(self._name_to_loc_key) - for name, loc_key in self._name_to_loc_key.iteritems(): + for name, loc_key in viewitems(self._name_to_loc_key): assert name in self._loc_key_to_names[loc_key] def find_free_name(self, name): @@ -211,6 +221,7 @@ class LocationDB(object): @name: string """ + name = force_bytes(name) if self.get_name_location(name) is None: return name i = 0 @@ -233,6 +244,7 @@ class LocationDB(object): LocKey may be updated and will be returned. """ + name = force_bytes(name) # Deprecation handling if is_int(name): assert offset is None or offset == name @@ -321,6 +333,14 @@ class LocationDB(object): """Return a human readable version of @loc_key, according to information available in this LocationDB instance""" names = self.get_location_names(loc_key) + new_names = set() + for name in names: + try: + name = name.decode() + except AttributeError: + pass + new_names.add(name) + names = new_names if names: return ",".join(names) offset = self.get_location_offset(loc_key) @@ -336,23 +356,25 @@ class LocationDB(object): @property def names(self): """Return all known names""" - return self._name_to_loc_key.keys() + return list(self._name_to_loc_key) @property def offsets(self): """Return all known offsets""" - return self._offset_to_loc_key.keys() + return list(self._offset_to_loc_key) def __str__(self): out = [] for loc_key in self._loc_keys: names = self.get_location_names(loc_key) offset = self.get_location_offset(loc_key) - out.append("%s: %s - %s" % ( - loc_key, - "0x%x" % offset if offset is not None else None, - ",".join(names) - )) + out.append( + "%s: %s - %s" % ( + loc_key, + "0x%x" % offset if offset is not None else None, + ",".join(printable(name) for name in names) + ) + ) return "\n".join(out) def merge(self, location_db): diff --git a/miasm2/core/objc.py b/miasm2/core/objc.py index 14352c7b..30b00682 100644 --- a/miasm2/core/objc.py +++ b/miasm2/core/objc.py @@ -5,10 +5,14 @@ C helper for Miasm: * Miasm expression to C type """ +from builtins import zip +from builtins import int as int_types import warnings from pycparser import c_parser, c_ast +from functools import total_ordering +from miasm2.core.utils import cmp_elts from miasm2.expression.expression_reduce import ExprReducer from miasm2.expression.expression import ExprInt, ExprId, ExprOp, ExprMem @@ -65,6 +69,7 @@ def objc_to_str(objc, result=None): return result +@total_ordering class ObjC(object): """Generic ObjC""" @@ -87,10 +92,13 @@ class ObjC(object): assert other.__class__ in OBJC_PRIO if OBJC_PRIO[self.__class__] != OBJC_PRIO[other.__class__]: - return cmp(OBJC_PRIO[self.__class__], OBJC_PRIO[other.__class__]) + return cmp_elts( + OBJC_PRIO[self.__class__], + OBJC_PRIO[other.__class__] + ) if self.align != other.align: - return cmp(self.align, other.align) - return cmp(self.size, other.size) + return cmp_elts(self.align, other.align) + return cmp_elts(self.size, other.size) def __hash__(self): return hash((self.__class__, self._align, self._size)) @@ -98,7 +106,18 @@ class ObjC(object): def __str__(self): return objc_to_str(self) + def __eq__(self, other): + return self.cmp_base(other) == 0 + + def __ne__(self, other): + # required Python 2.7.14 + return not self == other + + def __lt__(self, other): + return self.cmp_base(other) < 0 + +@total_ordering class ObjCDecl(ObjC): """C Declaration identified""" @@ -117,11 +136,19 @@ class ObjCDecl(ObjC): def __str__(self): return str(self.name) - def __cmp__(self, other): + def __eq__(self, other): + ret = self.cmp_base(other) + if ret: + return False + return self.name == other.name + + def __lt__(self, other): ret = self.cmp_base(other) if ret: - return ret - return cmp(self.name, other.name) + if ret < 0: + return True + return False + return self.name < other.name class ObjCInt(ObjC): @@ -133,10 +160,8 @@ class ObjCInt(ObjC): def __str__(self): return 'int' - def __cmp__(self, other): - return self.cmp_base(other) - +@total_ordering class ObjCPtr(ObjC): """C Pointer""" @@ -172,16 +197,27 @@ class ObjCPtr(ObjC): return hash((super(ObjCPtr, self).__hash__(), hash(self._objtype))) def __repr__(self): - return '<%s %r>' % (self.__class__.__name__, - self.objtype.__class__) + return '<%s %r>' % ( + self.__class__.__name__, + self.objtype.__class__ + ) + + def __eq__(self, other): + ret = self.cmp_base(other) + if ret: + return False + return self.objtype == other.objtype - def __cmp__(self, other): + def __lt__(self, other): ret = self.cmp_base(other) if ret: - return ret - return cmp(self.objtype, other.objtype) + if ret < 0: + return True + return False + return self.objtype < other.objtype +@total_ordering class ObjCArray(ObjC): """C array (test[XX])""" @@ -205,16 +241,23 @@ class ObjCArray(ObjC): def __repr__(self): return '<%r[%d]>' % (self.objtype, self.elems) - def __cmp__(self, other): + def __eq__(self, other): ret = self.cmp_base(other) if ret: - return ret - ret = cmp(self.elems, other.elems) - if ret: - return ret - return cmp(self.objtype, other.objtype) + return False + if self.objtype != other.objtype: + return False + return self.elems == other.elems + def __lt__(self, other): + ret = self.cmp_base(other) + if ret > 0: + return False + if self.objtype > other.objtype: + return False + return self.elems < other.elems +@total_ordering class ObjCStruct(ObjC): """C object for structures""" @@ -241,12 +284,22 @@ class ObjCStruct(ObjC): def __str__(self): return 'struct %s' % (self.name) - def __cmp__(self, other): + def __eq__(self, other): ret = self.cmp_base(other) if ret: - return ret - return cmp(self.name, other.name) + return False + return self.name == other.name + + def __lt__(self, other): + ret = self.cmp_base(other) + if ret: + if ret < 0: + return True + return False + return self.name < other.name + +@total_ordering class ObjCUnion(ObjC): """C object for unions""" @@ -273,11 +326,19 @@ class ObjCUnion(ObjC): def __str__(self): return 'union %s' % (self.name) - def __cmp__(self, other): + def __eq__(self, other): + ret = self.cmp_base(other) + if ret: + return False + return self.name == other.name + + def __lt__(self, other): ret = self.cmp_base(other) if ret: - return ret - return cmp(self.name, other.name) + if ret < 0: + return True + return False + return self.name < other.name class ObjCEllipsis(ObjC): """C integer""" @@ -288,10 +349,7 @@ class ObjCEllipsis(ObjC): align = property(lambda self: self._align) size = property(lambda self: self._size) - def __cmp__(self, other): - return self.cmp_base(other) - - +@total_ordering class ObjCFunc(ObjC): """C object for Functions""" @@ -311,8 +369,10 @@ class ObjCFunc(ObjC): return hash((super(ObjCFunc, self).__hash__(), hash(self._args), self._name)) def __repr__(self): - return "<%s %s>" % (self.__class__.__name__, - self.name) + return "<%s %s>" % ( + self.__class__.__name__, + self.name + ) def __str__(self): out = [] @@ -323,11 +383,19 @@ class ObjCFunc(ObjC): out.append(" %s %s" % (name, arg)) return '\n'.join(out) - def __cmp__(self, other): + def __eq__(self, other): + ret = self.cmp_base(other) + if ret: + return False + return self.name == other.name + + def __lt__(self, other): ret = self.cmp_base(other) if ret: - return ret - return cmp(self.name, other.name) + if ret < 0: + return True + return False + return self.name < other.name OBJC_PRIO = { ObjC: 0, @@ -448,7 +516,7 @@ class CGenInt(CGen): """Int C object""" def __init__(self, integer): - assert isinstance(integer, (int, long)) + assert isinstance(integer, int_types) self._integer = integer super(CGenInt, self).__init__(ObjCInt()) @@ -898,7 +966,7 @@ class ExprToAccessC(ExprReducer): if base_type.objtype.size == 0: missing_definition(base_type.objtype) return set() - element_num = offset / (base_type.objtype.size) + element_num = offset // (base_type.objtype.size) field_offset = offset % base_type.objtype.size if element_num >= base_type.elems: return set() @@ -919,7 +987,7 @@ class ExprToAccessC(ExprReducer): elif isinstance(base_type, ObjCDecl): if self.enforce_strict_access and offset % base_type.size != 0: return set() - elem_num = offset / base_type.size + elem_num = offset // base_type.size nobj = CGenArray(cgenobj, elem_num, void_type.align, void_type.size) @@ -942,7 +1010,7 @@ class ExprToAccessC(ExprReducer): new_type = out elif isinstance(base_type, ObjCPtr): - elem_num = offset / base_type.size + elem_num = offset // base_type.size if self.enforce_strict_access and offset % base_type.size != 0: return set() nobj = CGenArray(cgenobj, elem_num, @@ -1025,7 +1093,7 @@ class ExprToAccessC(ExprReducer): target = nobj.ctype.objtype for finalcgenobj in self.cgen_access(nobj, target, 0, True, lvl): assert isinstance(finalcgenobj.ctype, ObjCPtr) - if self.enforce_strict_access and finalcgenobj.ctype.objtype.size != node.expr.size / 8: + if self.enforce_strict_access and finalcgenobj.ctype.objtype.size != node.expr.size // 8: continue found.add(CGenDeref(finalcgenobj)) @@ -1035,16 +1103,16 @@ class ExprToAccessC(ExprReducer): if isinstance(target, (ObjCStruct, ObjCUnion)): for finalcgenobj in self.cgen_access(subcgenobj, target, 0, True, lvl): target = finalcgenobj.ctype.objtype - if self.enforce_strict_access and target.size != node.expr.size / 8: + if self.enforce_strict_access and target.size != node.expr.size // 8: continue found.add(CGenDeref(finalcgenobj)) elif isinstance(target, ObjCArray): - if self.enforce_strict_access and subcgenobj.ctype.size != node.expr.size / 8: + if self.enforce_strict_access and subcgenobj.ctype.size != node.expr.size // 8: continue found.update(self.cgen_access(CGenDeref(subcgenobj), target, 0, False, lvl)) else: - if self.enforce_strict_access and target.size != node.expr.size / 8: + if self.enforce_strict_access and target.size != node.expr.size // 8: continue found.add(CGenDeref(subcgenobj)) if not found: @@ -1504,14 +1572,14 @@ class CTypesManager(object): elif size.operator == "*": return arg0 * arg1 elif size.operator == "/": - return arg0 / arg1 + return arg0 // arg1 elif size.operator == "<<": return arg0 << arg1 elif size.operator == ">>": return arg0 >> arg1 else: raise ValueError("Unknown operator %s" % size.operator) - elif isinstance(size, (int, long)): + elif isinstance(size, int_types): return size elif isinstance(size, CTypeSizeof): obj = self._get_objc(size.target) diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index 7ddf838c..e9982503 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -1,5 +1,7 @@ #-*- coding:utf-8 -*- import re +import codecs +from builtins import range from miasm2.expression.expression import ExprId, ExprInt, ExprOp, LocKey import miasm2.core.asmblock as asmblock @@ -63,7 +65,7 @@ def guess_next_new_label(loc_db): """Generate a new label @loc_db: the LocationDB instance""" i = 0 - gen_name = "loc_%.8X" + gen_name = b"loc_%.8X" while True: name = gen_name % i label = loc_db.get_name_location(name) @@ -78,7 +80,7 @@ STATE_IN_BLOC = 1 def asm_ast_to_expr_with_size(arg, loc_db, size): if isinstance(arg, AstId): - return ExprId(arg.name, size) + return ExprId(arg.name.encode(), size) if isinstance(arg, AstOp): args = [asm_ast_to_expr_with_size(tmp, loc_db, size) for tmp in arg.args] return ExprOp(arg.op, *args) @@ -119,7 +121,7 @@ def parse_txt(mnemo, attrib, txt, loc_db=None): # label beginning with .L match_re = LABEL_RE.match(line) if match_re: - label_name = match_re.group(1) + label_name = match_re.group(1).encode() label = loc_db.get_or_create_name_location(label_name) lines.append(label) continue @@ -133,18 +135,20 @@ def parse_txt(mnemo, attrib, txt, loc_db=None): # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r'"')] - raw = raw.decode('string_escape') + raw = codecs.escape_decode(raw)[0] if directive == 'string': - raw += "\x00" + raw += b"\x00" lines.append(asmblock.AsmRaw(raw)) continue if directive == 'ustring': # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r'"')] + "\x00" - raw = raw.decode('string_escape') - raw = "".join([string + '\x00' for string in raw]) - lines.append(asmblock.AsmRaw(raw)) + raw = codecs.escape_decode(raw)[0] + out = b'' + for i in range(len(raw)): + out += raw[i:i+1] + b'\x00' + lines.append(asmblock.AsmRaw(out)) continue if directive in declarator: data_raw = line[match_re.end():].split(' ', 1)[1] @@ -183,12 +187,12 @@ def parse_txt(mnemo, attrib, txt, loc_db=None): if directive[0:4] == 'cfi_': continue - raise ValueError("unknown directive %s" % str(directive)) + raise ValueError("unknown directive %s" % directive) # label match_re = LABEL_RE.match(line) if match_re: - label_name = match_re.group(1) + label_name = match_re.group(1).encode() label = loc_db.get_or_create_name_location(label_name) lines.append(label) continue diff --git a/miasm2/core/sembuilder.py b/miasm2/core/sembuilder.py index 5694ffa3..8ea4c4ac 100644 --- a/miasm2/core/sembuilder.py +++ b/miasm2/core/sembuilder.py @@ -4,6 +4,8 @@ import inspect import ast import re +from future.utils import PY3 + import miasm2.expression.expression as m2_expr from miasm2.ir.ir import IRBlock, AssignBlock @@ -30,7 +32,6 @@ class MiasmTransformer(ast.NodeTransformer): # Recursive visit node = self.generic_visit(node) - if isinstance(node.func, ast.Name): # iX(Y) -> ExprInt(Y, X) fc_name = node.func.id @@ -110,6 +111,17 @@ class MiasmTransformer(ast.NodeTransformer): starargs=None, kwargs=None) +if PY3: + def get_arg_name(name): + return name.arg + def gen_arg(name, ctx): + return ast.arg(arg=name, ctx=ctx) +else: + def get_arg_name(name): + return name.id + def gen_arg(name, ctx): + return ast.Name(id=name, ctx=ctx) + class SemBuilder(object): """Helper for building instruction's semantic side effects method @@ -300,7 +312,7 @@ class SemBuilder(object): # Get the function AST parsed = ast.parse(inspect.getsource(func)) fc_ast = parsed.body[0] - argument_names = [name.id for name in fc_ast.args.args] + argument_names = [get_arg_name(name) for name in fc_ast.args.args] # Init local cache self._local_ctx = {} @@ -309,8 +321,10 @@ class SemBuilder(object): blocks, body = self._parse_body(fc_ast.body, argument_names) # Build the new function - fc_ast.args.args[0:0] = [ast.Name(id='ir', ctx=ast.Param()), - ast.Name(id='instr', ctx=ast.Param())] + fc_ast.args.args[0:0] = [ + gen_arg('ir', ast.Param()), + gen_arg('instr', ast.Param()) + ] cur_instr = blocks[0][0] if len(blocks[-1][0]) == 0: ## Last block can be empty diff --git a/miasm2/core/types.py b/miasm2/core/types.py index 051c4cca..b915c27f 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -102,9 +102,13 @@ Note that some structures (e.g. MemStr or MemArray) do not have a static size and cannot be allocated automatically. """ +from builtins import range, zip +from builtins import int as int_types import itertools import logging import struct +from future.utils import PY3 +from future.utils import viewitems, with_metaclass log = logging.getLogger(__name__) console_handler = logging.StreamHandler() @@ -155,7 +159,7 @@ def indent(s, size=4): # String generic getter/setter/len-er # TODO: make miasm2.os_dep.common and jitter ones use these ones -def get_str(vm, addr, enc, max_char=None, end='\x00'): +def get_str(vm, addr, enc, max_char=None, end=u'\x00'): """Get a @end (by default '\\x00') terminated @enc encoded string from a VmMngr. @@ -186,7 +190,7 @@ def get_str(vm, addr, enc, max_char=None, end='\x00'): break s.append(c) i += step - return ''.join(s).decode(enc) + return b''.join(s).decode(enc) def raw_str(s, enc, end=u'\x00'): """Returns a string representing @s as an @end (by default \\x00) @@ -225,7 +229,7 @@ def raw_len(py_unic_str, enc, end=u'\x00'): """ return len(raw_str(py_unic_str, enc)) -def enc_triplet(enc, max_char=None, end='\x00'): +def enc_triplet(enc, max_char=None, end=u'\x00'): """Returns a triplet of functions (get_str_enc, set_str_enc, raw_len_enc) for a given encoding (as needed by Str to add an encoding). The prototypes are: @@ -310,8 +314,11 @@ class Type(object): Called by self.lval when it is not in cache. """ pinned_base_class = self._get_pinned_base_class() - pinned_type = type("Mem%r" % self, (pinned_base_class,), - {'_type': self}) + pinned_type = type( + "Mem%r" % self, + (pinned_base_class,), + {'_type': self} + ) return pinned_type def _get_pinned_base_class(self): @@ -344,7 +351,7 @@ class Type(object): raise NotImplementedError("Abstract method") def __ne__(self, other): - return not self.__eq__(other) + return not self == other class RawStruct(Type): @@ -373,7 +380,7 @@ class RawStruct(Type): return self.__class__ == other.__class__ and self._fmt == other._fmt def __ne__(self, other): - return not self.__eq__(other) + return not self == other def __hash__(self): return hash((self.__class__, self._fmt)) @@ -495,7 +502,7 @@ class Ptr(Num): # Actual job dst_addr = self.get_val(vm, addr) - vm.set_mem(dst_addr, str(val)) + vm.set_mem(dst_addr, bytes(val)) def _get_pinned_base_class(self): return MemPtr @@ -510,7 +517,7 @@ class Ptr(Num): self._type_kwargs == other._type_kwargs def __ne__(self, other): - return not self.__eq__(other) + return not self == other def __hash__(self): return hash((super(Ptr, self).__hash__(), self.dst_type, @@ -587,7 +594,7 @@ class Struct(Type): # But the current offset is added 'offset': fd['offset'] + offset, } - for name, fd in field._fields_desc.iteritems() + for name, fd in viewitems(field._fields_desc) } # Add the newly generated fields from the anon field @@ -629,7 +636,7 @@ class Struct(Type): return self._fields def set(self, vm, addr, val): - raw = str(val) + raw = bytes(val) vm.set_mem(addr, raw) def get(self, vm, addr): @@ -683,7 +690,7 @@ class Struct(Type): self.name == other.name def __ne__(self, other): - return not self.__eq__(other) + return not self == other def __hash__(self): # Only hash name, not fields, because if a field is a Ptr to this @@ -771,7 +778,7 @@ class Array(Type): if isinstance(val, MemSizedArray): if val.array_len != self.array_len or len(val) != self.size: raise ValueError("Size mismatch in MemSizedArray assignment") - raw = str(val) + raw = bytes(val) vm.set_mem(addr, raw) # list assignment @@ -810,7 +817,7 @@ class Array(Type): if isinstance(idx, slice): res = [] idx = self._normalize_slice(idx) - for i in xrange(idx.start, idx.stop, idx.step): + for i in range(idx.start, idx.stop, idx.step): res.append(self.field_type.get(vm, addr + self.get_offset(i))) return res else: @@ -823,9 +830,9 @@ class Array(Type): """ if isinstance(idx, slice): idx = self._normalize_slice(idx) - if len(item) != len(xrange(idx.start, idx.stop, idx.step)): + if len(item) != len(range(idx.start, idx.stop, idx.step)): raise ValueError("Mismatched lengths in slice assignment") - for i, val in itertools.izip(xrange(idx.start, idx.stop, idx.step), + for i, val in zip(range(idx.start, idx.stop, idx.step), item): self.field_type.set(vm, addr + self.get_offset(i), val) else: @@ -855,7 +862,7 @@ class Array(Type): return slice(start, stop, step) def _check_bounds(self, idx): - if not isinstance(idx, (int, long)): + if not isinstance(idx, int_types): raise ValueError("index must be an int or a long") if idx < 0 or (self.is_sized() and idx >= self.size): raise IndexError("Index %s out of bounds" % idx) @@ -875,7 +882,7 @@ class Array(Type): self.array_len == other.array_len def __ne__(self, other): - return not self.__eq__(other) + return not self == other def __hash__(self): return hash((self.__class__, self.field_type, self.array_len)) @@ -942,7 +949,7 @@ class Bits(Type): self._bit_offset == other._bit_offset def __ne__(self, other): - return not self.__eq__(other) + return not self == other def __hash__(self): return hash((self.__class__, self._num, self._bits, self._bit_offset)) @@ -1002,7 +1009,7 @@ class BitField(Union): self._num == other._num and super(BitField, self).__eq__(other) def __ne__(self, other): - return not self.__eq__(other) + return not self == other def __hash__(self): return hash((super(BitField, self).__hash__(), self._num)) @@ -1117,7 +1124,7 @@ class Str(Type): return self.__class__ == other.__class__ and self._enc == other._enc def __ne__(self, other): - return not self.__eq__(other) + return not self == other def __hash__(self): return hash((self.__class__, self._enc)) @@ -1136,7 +1143,7 @@ class Void(Type): return self.__class__ == other.__class__ def __ne__(self, other): - return not self.__eq__(other) + return not self == other def __hash__(self): return hash(self.__class__) @@ -1191,7 +1198,7 @@ class _MetaMemStruct(_MetaMemType): cls.gen_fields() -class MemType(object): +class MemType(with_metaclass(_MetaMemType, object)): """Base class for classes that allow to map python objects to C types in virtual memory. Represents an lvalue of a given type. @@ -1200,7 +1207,6 @@ class MemType(object): The main exception is MemStruct, which you may want to subclass yourself for syntactic ease. """ - __metaclass__ = _MetaMemType # allocator is a function(vm, size) -> allocated_address allocator = None @@ -1278,12 +1284,12 @@ class MemType(object): """ return self.sizeof() - def memset(self, byte='\x00'): + def memset(self, byte=b'\x00'): """Fill the memory space of this MemType with @byte ('\x00' by default). The size is retrieved with self.get_size() (dynamic size). """ # TODO: multibyte patterns - if not isinstance(byte, str) or not len(byte) == 1: + if not isinstance(byte, bytes) or len(byte) != 1: raise ValueError("byte must be a 1-lengthed str") self._vm.set_mem(self.get_addr(), byte * self.get_size()) @@ -1319,6 +1325,11 @@ class MemType(object): return self.get_size() def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def __bytes__(self): return self.raw() def __repr__(self): @@ -1327,7 +1338,7 @@ class MemType(object): def __eq__(self, other): return self.__class__ == other.__class__ and \ self.get_type() == other.get_type() and \ - str(self) == str(other) + bytes(self) == bytes(other) def __ne__(self, other): return not self == other @@ -1350,7 +1361,7 @@ class MemValue(MemType): return "%r: %r" % (self.__class__, self.val) -class MemStruct(MemType): +class MemStruct(with_metaclass(_MetaMemStruct, MemType)): """Base class to easily implement VmMngr backed C-like structures in miasm. Represents a structure in virtual memory. @@ -1403,7 +1414,6 @@ class MemStruct(MemType): doc for more information on how to handle recursive types and cyclic dependencies. """ - __metaclass__ = _MetaMemStruct fields = None def get_addr(self, field_name=None): @@ -1667,7 +1677,7 @@ class MemSizedArray(MemArray): return self.get_type().size def __iter__(self): - for i in xrange(self.get_type().array_len): + for i in range(self.get_type().array_len): yield self[i] def raw(self): diff --git a/miasm2/core/utils.py b/miasm2/core/utils.py index c1f48418..9856d4f2 100644 --- a/miasm2/core/utils.py +++ b/miasm2/core/utils.py @@ -1,7 +1,15 @@ +from __future__ import print_function +from builtins import range import struct import inspect -import UserDict +from collections import MutableMapping as DictMixin + from operator import itemgetter +import codecs + +from future.utils import viewitems + +import collections upck8 = lambda x: struct.unpack('B', x)[0] upck16 = lambda x: struct.unpack('H', x)[0] @@ -62,23 +70,57 @@ class Disasm_Exception(Exception): pass +def printable(string): + if isinstance(string, bytes): + return "".join( + c.decode() if b" " <= c < b"~" else "." + for c in (string[i:i+1] for i in range(len(string))) + ) + return string + + +def force_bytes(value): + try: + return value.encode() + except AttributeError: + return value + + +def iterbytes(string): + for i in range(len(string)): + yield string[i:i+1] + + +def int_to_byte(value): + return struct.pack('B', value) + +def cmp_elts(elt1, elt2): + return (elt1 > elt2) - (elt1 < elt2) + + +_DECODE_HEX = codecs.getdecoder("hex_codec") +_ENCODE_HEX = codecs.getencoder("hex_codec") + +def decode_hex(value): + return _DECODE_HEX(value)[0] + +def encode_hex(value): + return _ENCODE_HEX(value)[0] + + def hexdump(src, length=16): - FILTER = ''.join( - [(len(repr(chr(x))) == 3) and chr(x) or '.' for x in range(256)]) lines = [] - for c in xrange(0, len(src), length): + for c in range(0, len(src), length): chars = src[c:c + length] - hexa = ' '.join(["%02x" % ord(x) for x in chars]) + hexa = ' '.join("%02x" % ord(x) for x in iterbytes(chars)) printable = ''.join( - ["%s" % ((ord(x) <= 127 and FILTER[ord(x)]) or '.') for x in chars]) + x.decode() if 32 <= ord(x) <= 126 else '.' for x in iterbytes(chars) + ) lines.append("%04x %-*s %s\n" % (c, length * 3, hexa, printable)) - print ''.join(lines) - -# stackoverflow.com/questions/2912231 - -import collections + print(''.join(lines)) +# stackoverflow.com/questions/2912231 class keydefaultdict(collections.defaultdict): def __missing__(self, key): @@ -88,7 +130,7 @@ class keydefaultdict(collections.defaultdict): return value -class BoundedDict(UserDict.DictMixin): +class BoundedDict(DictMixin): """Limited in size dictionary. @@ -108,7 +150,7 @@ class BoundedDict(UserDict.DictMixin): @delete_cb: (optional) callback called when an element is removed """ self._data = initialdata.copy() if initialdata else {} - self._min_size = min_size if min_size else max_size / 3 + self._min_size = min_size if min_size else max_size // 3 self._max_size = max_size self._size = len(self._data) # Do not use collections.Counter as it is quite slow @@ -122,8 +164,11 @@ class BoundedDict(UserDict.DictMixin): # Bound can only be reached on a new element if (self._size >= self._max_size): - most_common = sorted(self._counter.iteritems(), - key=itemgetter(1), reverse=True) + most_common = sorted( + viewitems(self._counter), + key=itemgetter(1), + reverse=True + ) # Handle callback if self._delete_cb is not None: @@ -154,7 +199,7 @@ class BoundedDict(UserDict.DictMixin): def keys(self): "Return the list of dict's keys" - return self._data.keys() + return list(self._data) @property def data(self): @@ -180,3 +225,10 @@ class BoundedDict(UserDict.DictMixin): if self._delete_cb: for key in self._data: self._delete_cb(key) + + + def __len__(self): + return len(self._data) + + def __iter__(self): + return iter(self._data) |