From 4ac1d728d4418ff50330feb631bf3ea2f16429a8 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Sun, 11 Oct 2015 00:40:35 +0200 Subject: BoundedDict: faster implementation Replaced collections.Counter with a simple {key -> int} built-in dict. This allows for an up to 2 times speed up when jitting. --- miasm2/core/utils.py | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'miasm2') diff --git a/miasm2/core/utils.py b/miasm2/core/utils.py index 782c21d6..b34ec971 100644 --- a/miasm2/core/utils.py +++ b/miasm2/core/utils.py @@ -1,6 +1,7 @@ import struct import inspect import UserDict +from operator import itemgetter upck8 = lambda x: struct.unpack('B', x)[0] upck16 = lambda x: struct.unpack('H', x)[0] @@ -73,7 +74,8 @@ class BoundedDict(UserDict.DictMixin): self._min_size = min_size if min_size else max_size / 3 self._max_size = max_size self._size = len(self._data) - self._counter = collections.Counter(self._data.keys()) + # Do not use collections.Counter as it is quite slow + self._counter = dict((k, 1) for k in self._data.iterkeys()) self._delete_cb = delete_cb def __setitem__(self, asked_key, value): @@ -83,31 +85,46 @@ class BoundedDict(UserDict.DictMixin): # Bound can only be reached on a new element if (self._size >= self._max_size): - most_commons = [key for key, _ in self._counter.most_common()] + most_common = sorted(self._counter.iteritems(), + key=itemgetter(1), reverse=True) # Handle callback if self._delete_cb is not None: - for key in most_commons[self._min_size - 1:]: + for key, _ in most_common[self._min_size - 1:]: self._delete_cb(key) # Keep only the most @_min_size used self._data = {key:self._data[key] - for key in most_commons[:self._min_size - 1]} + for key, _ in most_common[:self._min_size - 1]} self._size = self._min_size # Reset use's counter - self._counter = collections.Counter(self._data.keys()) + self._counter = dict((k, 1) for k in self._data.iterkeys()) + + # Avoid rechecking in dict: set to 1 here, add 1 otherwise + self._counter[asked_key] = 1 + else: + self._counter[asked_key] += 1 self._data[asked_key] = value - self._counter.update([asked_key]) + + def __contains__(self, key): + # Do not call has_key to avoid adding function call overhead + return key in self._data + + def has_key(self, key): + return key in self._data def keys(self): "Return the list of dict's keys" return self._data.keys() def __getitem__(self, key): - self._counter.update([key]) - return self._data[key] + # Retrieve data first to raise the proper exception on error + data = self._data[key] + # Should never raise, since the key is in self._data + self._counter[key] += 1 + return data def __delitem__(self, key): if self._delete_cb is not None: -- cgit 1.4.1 From c9189eb28272bcf55599330b36521afd1f20d47f Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Sun, 11 Oct 2015 00:44:19 +0200 Subject: jitload: added a CallbackHandler.has_callbacks method Calling this method before the callback loop in runiter_once allows a non negligeable speedup (around 20% for tested samples). --- miasm2/jitter/jitload.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'miasm2') diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py index 1c88d0b7..c5ea83f7 100644 --- a/miasm2/jitter/jitload.py +++ b/miasm2/jitter/jitload.py @@ -113,6 +113,9 @@ class CallbackHandler(object): return empty_keys + def has_callbacks(self, name): + return name in self.callbacks + def call_callbacks(self, name, *args): """Call callbacks associated to key 'name' with arguments args. While callbacks return True, continue with next callback. @@ -134,13 +137,19 @@ class CallbackHandlerBitflag(CallbackHandler): "Handle a list of callback with conditions on bitflag" + # Overrides CallbackHandler's implem, but do not serve for optimization + def has_callbacks(self, bitflag): + for b in self.callbacks.iterkeys(): + if b & bitflag != 0: + return True + def __call__(self, bitflag, *args): """Call each callbacks associated with bit set in bitflag. While callbacks return True, continue with next callback. Iterator on other results""" res = True - for b in self.callbacks.keys(): + for b in self.callbacks.iterkeys(): if b & bitflag != 0: # If the flag matched @@ -301,9 +310,10 @@ class jitter: # Check breakpoints old_pc = self.pc - for res in self.breakpoints_handler(self.pc, self): - if res is not True: - yield res + if self.breakpoints_handler.has_callbacks(self.pc): + for res in self.breakpoints_handler(self.pc, self): + if res is not True: + yield res # If a callback changed pc, re call every callback if old_pc != self.pc: -- cgit 1.4.1 From cb5b9fc9d55b396ddfd6b4930197cb6d9398117c Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Thu, 15 Oct 2015 10:09:09 +0200 Subject: BoundedDict: better dict syntax --- miasm2/core/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'miasm2') diff --git a/miasm2/core/utils.py b/miasm2/core/utils.py index b34ec971..75eb3113 100644 --- a/miasm2/core/utils.py +++ b/miasm2/core/utils.py @@ -75,7 +75,7 @@ class BoundedDict(UserDict.DictMixin): self._max_size = max_size self._size = len(self._data) # Do not use collections.Counter as it is quite slow - self._counter = dict((k, 1) for k in self._data.iterkeys()) + self._counter = {k: 1 for k in self._data} self._delete_cb = delete_cb def __setitem__(self, asked_key, value): @@ -99,7 +99,7 @@ class BoundedDict(UserDict.DictMixin): self._size = self._min_size # Reset use's counter - self._counter = dict((k, 1) for k in self._data.iterkeys()) + self._counter = {k: 1 for k in self._data} # Avoid rechecking in dict: set to 1 here, add 1 otherwise self._counter[asked_key] = 1 -- cgit 1.4.1 From 24d3e96d8805fc2b8fa19788c018bbe2c71529c1 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Thu, 15 Oct 2015 10:09:36 +0200 Subject: Jitload: more concise syntax in has_callback --- miasm2/jitter/jitload.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'miasm2') diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py index c5ea83f7..112920a1 100644 --- a/miasm2/jitter/jitload.py +++ b/miasm2/jitter/jitload.py @@ -139,9 +139,7 @@ class CallbackHandlerBitflag(CallbackHandler): # Overrides CallbackHandler's implem, but do not serve for optimization def has_callbacks(self, bitflag): - for b in self.callbacks.iterkeys(): - if b & bitflag != 0: - return True + return any(cb_mask & bitflag != 0 for cb_mask in self.callbacks) def __call__(self, bitflag, *args): """Call each callbacks associated with bit set in bitflag. While @@ -149,7 +147,7 @@ class CallbackHandlerBitflag(CallbackHandler): Iterator on other results""" res = True - for b in self.callbacks.iterkeys(): + for b in self.callbacks: if b & bitflag != 0: # If the flag matched -- cgit 1.4.1