diff options
| author | serpilliere <serpilliere@users.noreply.github.com> | 2016-01-13 13:53:37 +0100 |
|---|---|---|
| committer | serpilliere <serpilliere@users.noreply.github.com> | 2016-01-13 13:53:37 +0100 |
| commit | 8dbf8b2747bed3257bb8db5f1c01cd08a846c3f0 (patch) | |
| tree | 62efa192dd98ea78c0109e35c3c0ad7bcf9b3faa | |
| parent | 6461a40e5eaf4bf39aadfee29ac72fe9afac4f9e (diff) | |
| parent | 6fde824b01e3ef0775b7503a153048f4375a68b1 (diff) | |
| download | miasm-8dbf8b2747bed3257bb8db5f1c01cd08a846c3f0.tar.gz miasm-8dbf8b2747bed3257bb8db5f1c01cd08a846c3f0.zip | |
Merge pull request #300 from commial/speedup-disasm
Speedup disasm
| -rw-r--r-- | miasm2/core/asmbloc.py | 1 | ||||
| -rw-r--r-- | miasm2/core/bin_stream.py | 92 | ||||
| -rw-r--r-- | miasm2/core/bin_stream_ida.py | 2 | ||||
| -rw-r--r-- | miasm2/core/cpu.py | 15 |
4 files changed, 78 insertions, 32 deletions
diff --git a/miasm2/core/asmbloc.py b/miasm2/core/asmbloc.py index 71e577cf..fb910b3a 100644 --- a/miasm2/core/asmbloc.py +++ b/miasm2/core/asmbloc.py @@ -375,7 +375,6 @@ def dis_bloc(mnemo, pool_bin, cur_bloc, offset, job_done, symbol_pool, off_i = offset try: - # print repr(pool_bin.getbytes(offset, 4)) instr = mnemo.dis(pool_bin, attrib, offset) except (Disasm_Exception, IOError), e: log_asmbloc.warning(e) diff --git a/miasm2/core/bin_stream.py b/miasm2/core/bin_stream.py index f7b160f9..cfcdf8a5 100644 --- a/miasm2/core/bin_stream.py +++ b/miasm2/core/bin_stream.py @@ -15,10 +15,17 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # +import math class bin_stream(object): + # Cache must be initialized by entering atomic mode + _cache = None + CACHE_SIZE = 10000 + # By default, no atomic mode + _atomic_mode = False + def __init__(self, *args, **kargs): pass @@ -28,38 +35,73 @@ class bin_stream(object): def hexdump(self, offset, l): return + def enter_atomic_mode(self): + """Enter atomic mode. In this mode, read may be cached""" + assert not self._atomic_mode + self._atomic_mode = True + self._cache = {} + + def leave_atomic_mode(self): + """Leave atomic mode""" + assert self._atomic_mode + self._atomic_mode = False + self._cache = None + + def _getbytes(self, start, length): + return self.bin[start:start + length] + def getbytes(self, start, l=1): - return self.bin[start:start + l] + """Return the bytes from the bit stream + @start: starting offset (in byte) + @l: (optional) number of bytes to read + + Wrapper on _getbytes, with atomic mode handling. + """ + if self._atomic_mode: + val = self._cache.get((start,l), None) + if val is None: + val = self._getbytes(start, l) + self._cache[(start,l)] = val + else: + val = self._getbytes(start, l) + return val def getbits(self, start, n): """Return the bits from the bit stream @start: the offset in bits @n: number of bits to read """ - if not n: + # Trivial case + if n == 0: return 0 - o = 0 + + # Get initial bytes if n > self.getlen() * 8: raise IOError('not enough bits %r %r' % (n, len(self.bin) * 8)) + temp = self.getbytes(start / 8, int(math.ceil(n / 8.))) + if not temp: + raise IOError('cannot get bytes') + + # Init + start = start % 8 + out = 0 while n: - # print 'xxx', n, start - i = start / 8 - c = self.getbytes(i) - if not c: - raise IOError('cannot get bytes') - c = ord(c) - # print 'o', hex(c) - r = 8 - start % 8 - c &= (1 << r) - 1 - # print 'm', hex(c) - l = min(r, n) - # print 'd', r-l - c >>= (r - l) - o <<= l - o |= c - n -= l - start += l - return o + # Get needed bits, working on maximum 8 bits at a time + cur_byte_idx = start / 8 + new_bits = ord(temp[cur_byte_idx]) + to_keep = 8 - start % 8 + new_bits &= (1 << to_keep) - 1 + cur_len = min(to_keep, n) + new_bits >>= (to_keep - cur_len) + + # Update output + out <<= cur_len + out |= new_bits + + # Update counters + n -= cur_len + start += cur_len + return out class bin_stream_str(bin_stream): @@ -71,11 +113,11 @@ class bin_stream_str(bin_stream): self.shift = shift self.l = len(input_str) - def getbytes(self, start, l=1): + def _getbytes(self, start, l=1): if start + l + self.shift > self.l: raise IOError("not enough bytes in str") - return super(bin_stream_str, self).getbytes(start + self.shift, l) + return super(bin_stream_str, self)._getbytes(start + self.shift, l) def readbs(self, l=1): if self.offset + l + self.shift > self.l: @@ -143,7 +185,7 @@ class bin_stream_container(bin_stream): self.offset += l return self.bin.get(self.offset - l, self.offset) - def getbytes(self, start, l=1): + def _getbytes(self, start, l=1): return self.bin.get(start, start + l) def __str__(self): @@ -172,7 +214,7 @@ class bin_stream_vm(bin_stream): def getlen(self): return 0xFFFFFFFFFFFFFFFF - def getbytes(self, start, l=1): + def _getbytes(self, start, l=1): try: s = self.vm.get_mem(start + self.base_offset, l) except: diff --git a/miasm2/core/bin_stream_ida.py b/miasm2/core/bin_stream_ida.py index 1610f8ca..ee3dcec3 100644 --- a/miasm2/core/bin_stream_ida.py +++ b/miasm2/core/bin_stream_ida.py @@ -11,7 +11,7 @@ class bin_stream_ida(bin_stream_str): Don't generate xrange using address computation: It can raise error on overflow 7FFFFFFF with 32 bit python """ - def getbytes(self, start, l=1): + def _getbytes(self, start, l=1): o = "" for ad in xrange(l): o += chr(Byte(ad + start - self.shift)) diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index cfbc1dfb..48f7e26e 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -1020,11 +1020,7 @@ class cls_mn(object): else: todo.append((dict(fname_values), (nb, v), offset_b)) - candidates = [c for c in candidates] - - if not candidates: - raise Disasm_Exception('cannot disasm (guess) at %X' % offset) - return candidates + return [c for c in candidates] def reset_class(self): for f in self.fields_order: @@ -1093,10 +1089,16 @@ class cls_mn(object): if not isinstance(bs_o, bin_stream): bs_o = bin_stream_str(bs_o) + bs_o.enter_atomic_mode() + offset_o = offset pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis( bs_o, mode_o, offset) candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset) + if not candidates: + bs_o.leave_atomic_mode() + raise Disasm_Exception('cannot disasm (guess) at %X' % offset) + out = [] out_c = [] if hasattr(bs, 'getlen'): @@ -1180,6 +1182,9 @@ class cls_mn(object): alias = True out.append(instr) out_c.append(c) + + bs_o.leave_atomic_mode() + if not out: raise Disasm_Exception('cannot disasm at %X' % offset_o) if len(out) != 1: |