about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorserpilliere <serpilliere@users.noreply.github.com>2016-01-13 13:53:37 +0100
committerserpilliere <serpilliere@users.noreply.github.com>2016-01-13 13:53:37 +0100
commit8dbf8b2747bed3257bb8db5f1c01cd08a846c3f0 (patch)
tree62efa192dd98ea78c0109e35c3c0ad7bcf9b3faa
parent6461a40e5eaf4bf39aadfee29ac72fe9afac4f9e (diff)
parent6fde824b01e3ef0775b7503a153048f4375a68b1 (diff)
downloadmiasm-8dbf8b2747bed3257bb8db5f1c01cd08a846c3f0.tar.gz
miasm-8dbf8b2747bed3257bb8db5f1c01cd08a846c3f0.zip
Merge pull request #300 from commial/speedup-disasm
Speedup disasm
-rw-r--r--miasm2/core/asmbloc.py1
-rw-r--r--miasm2/core/bin_stream.py92
-rw-r--r--miasm2/core/bin_stream_ida.py2
-rw-r--r--miasm2/core/cpu.py15
4 files changed, 78 insertions, 32 deletions
diff --git a/miasm2/core/asmbloc.py b/miasm2/core/asmbloc.py
index 71e577cf..fb910b3a 100644
--- a/miasm2/core/asmbloc.py
+++ b/miasm2/core/asmbloc.py
@@ -375,7 +375,6 @@ def dis_bloc(mnemo, pool_bin, cur_bloc, offset, job_done, symbol_pool,
 
         off_i = offset
         try:
-            # print repr(pool_bin.getbytes(offset, 4))
             instr = mnemo.dis(pool_bin, attrib, offset)
         except (Disasm_Exception, IOError), e:
             log_asmbloc.warning(e)
diff --git a/miasm2/core/bin_stream.py b/miasm2/core/bin_stream.py
index f7b160f9..cfcdf8a5 100644
--- a/miasm2/core/bin_stream.py
+++ b/miasm2/core/bin_stream.py
@@ -15,10 +15,17 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 #
+import math
 
 
 class bin_stream(object):
 
+    # Cache must be initialized by entering atomic mode
+    _cache = None
+    CACHE_SIZE = 10000
+    # By default, no atomic mode
+    _atomic_mode = False
+
     def __init__(self, *args, **kargs):
         pass
 
@@ -28,38 +35,73 @@ class bin_stream(object):
     def hexdump(self, offset, l):
         return
 
+    def enter_atomic_mode(self):
+        """Enter atomic mode. In this mode, read may be cached"""
+        assert not self._atomic_mode
+        self._atomic_mode = True
+        self._cache = {}
+
+    def leave_atomic_mode(self):
+        """Leave atomic mode"""
+        assert self._atomic_mode
+        self._atomic_mode = False
+        self._cache = None
+
+    def _getbytes(self, start, length):
+        return self.bin[start:start + length]
+
     def getbytes(self, start, l=1):
-        return self.bin[start:start + l]
+        """Return the bytes from the bit stream
+        @start: starting offset (in byte)
+        @l: (optional) number of bytes to read
+
+        Wrapper on _getbytes, with atomic mode handling.
+        """
+        if self._atomic_mode:
+            val = self._cache.get((start,l), None)
+            if val is None:
+                val = self._getbytes(start, l)
+                self._cache[(start,l)] = val
+        else:
+            val = self._getbytes(start, l)
+        return val
 
     def getbits(self, start, n):
         """Return the bits from the bit stream
         @start: the offset in bits
         @n: number of bits to read
         """
-        if not n:
+        # Trivial case
+        if n == 0:
             return 0
-        o = 0
+
+        # Get initial bytes
         if n > self.getlen() * 8:
             raise IOError('not enough bits %r %r' % (n, len(self.bin) * 8))
+        temp = self.getbytes(start / 8, int(math.ceil(n / 8.)))
+        if not temp:
+            raise IOError('cannot get bytes')
+
+        # Init
+        start = start % 8
+        out = 0
         while n:
-            # print 'xxx', n, start
-            i = start / 8
-            c = self.getbytes(i)
-            if not c:
-                raise IOError('cannot get bytes')
-            c = ord(c)
-            # print 'o', hex(c)
-            r = 8 - start % 8
-            c &= (1 << r) - 1
-            # print 'm', hex(c)
-            l = min(r, n)
-            # print 'd', r-l
-            c >>= (r - l)
-            o <<= l
-            o |= c
-            n -= l
-            start += l
-        return o
+            # Get needed bits, working on maximum 8 bits at a time
+            cur_byte_idx = start / 8
+            new_bits = ord(temp[cur_byte_idx])
+            to_keep = 8 - start % 8
+            new_bits &= (1 << to_keep) - 1
+            cur_len = min(to_keep, n)
+            new_bits >>= (to_keep - cur_len)
+
+            # Update output
+            out <<= cur_len
+            out |= new_bits
+
+            # Update counters
+            n -= cur_len
+            start += cur_len
+        return out
 
 
 class bin_stream_str(bin_stream):
@@ -71,11 +113,11 @@ class bin_stream_str(bin_stream):
         self.shift = shift
         self.l = len(input_str)
 
-    def getbytes(self, start, l=1):
+    def _getbytes(self, start, l=1):
         if start + l + self.shift > self.l:
             raise IOError("not enough bytes in str")
 
-        return super(bin_stream_str, self).getbytes(start + self.shift, l)
+        return super(bin_stream_str, self)._getbytes(start + self.shift, l)
 
     def readbs(self, l=1):
         if self.offset + l + self.shift > self.l:
@@ -143,7 +185,7 @@ class bin_stream_container(bin_stream):
         self.offset += l
         return self.bin.get(self.offset - l, self.offset)
 
-    def getbytes(self, start, l=1):
+    def _getbytes(self, start, l=1):
         return self.bin.get(start, start + l)
 
     def __str__(self):
@@ -172,7 +214,7 @@ class bin_stream_vm(bin_stream):
     def getlen(self):
         return 0xFFFFFFFFFFFFFFFF
 
-    def getbytes(self, start, l=1):
+    def _getbytes(self, start, l=1):
         try:
             s = self.vm.get_mem(start + self.base_offset, l)
         except:
diff --git a/miasm2/core/bin_stream_ida.py b/miasm2/core/bin_stream_ida.py
index 1610f8ca..ee3dcec3 100644
--- a/miasm2/core/bin_stream_ida.py
+++ b/miasm2/core/bin_stream_ida.py
@@ -11,7 +11,7 @@ class bin_stream_ida(bin_stream_str):
     Don't generate xrange using address computation:
     It can raise error on overflow 7FFFFFFF with 32 bit python
     """
-    def getbytes(self, start, l=1):
+    def _getbytes(self, start, l=1):
         o = ""
         for ad in xrange(l):
             o += chr(Byte(ad + start - self.shift))
diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py
index cfbc1dfb..48f7e26e 100644
--- a/miasm2/core/cpu.py
+++ b/miasm2/core/cpu.py
@@ -1020,11 +1020,7 @@ class cls_mn(object):
                 else:
                     todo.append((dict(fname_values), (nb, v), offset_b))
 
-        candidates = [c for c in candidates]
-
-        if not candidates:
-            raise Disasm_Exception('cannot disasm (guess) at %X' % offset)
-        return candidates
+        return [c for c in candidates]
 
     def reset_class(self):
         for f in self.fields_order:
@@ -1093,10 +1089,16 @@ class cls_mn(object):
         if not isinstance(bs_o, bin_stream):
             bs_o = bin_stream_str(bs_o)
 
+        bs_o.enter_atomic_mode()
+
         offset_o = offset
         pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis(
             bs_o, mode_o, offset)
         candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset)
+        if not candidates:
+            bs_o.leave_atomic_mode()
+            raise Disasm_Exception('cannot disasm (guess) at %X' % offset)
+
         out = []
         out_c = []
         if hasattr(bs, 'getlen'):
@@ -1180,6 +1182,9 @@ class cls_mn(object):
                 alias = True
             out.append(instr)
             out_c.append(c)
+
+        bs_o.leave_atomic_mode()
+
         if not out:
             raise Disasm_Exception('cannot disasm at %X' % offset_o)
         if len(out) != 1: