diff options
| author | Florent Monjalet <florent.monjalet@gmail.com> | 2015-10-11 00:40:35 +0200 |
|---|---|---|
| committer | Florent Monjalet <florent.monjalet@gmail.com> | 2015-10-11 00:40:35 +0200 |
| commit | 4ac1d728d4418ff50330feb631bf3ea2f16429a8 (patch) | |
| tree | 2135c2add1bd14b7b6475b8b0bcf7d3f4b9d2322 /miasm2/core/utils.py | |
| parent | cbb7a7bb62734b9a5ca2d2a126b060c8c7b72588 (diff) | |
| download | miasm-4ac1d728d4418ff50330feb631bf3ea2f16429a8.tar.gz miasm-4ac1d728d4418ff50330feb631bf3ea2f16429a8.zip | |
BoundedDict: faster implementation
Replaced collections.Counter with a simple {key -> int} built-in dict.
This allows for an up to 2 times speed up when jitting.
Diffstat (limited to 'miasm2/core/utils.py')
| -rw-r--r-- | miasm2/core/utils.py | 33 |
1 files changed, 25 insertions, 8 deletions
diff --git a/miasm2/core/utils.py b/miasm2/core/utils.py index 782c21d6..b34ec971 100644 --- a/miasm2/core/utils.py +++ b/miasm2/core/utils.py @@ -1,6 +1,7 @@ import struct import inspect import UserDict +from operator import itemgetter upck8 = lambda x: struct.unpack('B', x)[0] upck16 = lambda x: struct.unpack('H', x)[0] @@ -73,7 +74,8 @@ class BoundedDict(UserDict.DictMixin): self._min_size = min_size if min_size else max_size / 3 self._max_size = max_size self._size = len(self._data) - self._counter = collections.Counter(self._data.keys()) + # Do not use collections.Counter as it is quite slow + self._counter = dict((k, 1) for k in self._data.iterkeys()) self._delete_cb = delete_cb def __setitem__(self, asked_key, value): @@ -83,31 +85,46 @@ class BoundedDict(UserDict.DictMixin): # Bound can only be reached on a new element if (self._size >= self._max_size): - most_commons = [key for key, _ in self._counter.most_common()] + most_common = sorted(self._counter.iteritems(), + key=itemgetter(1), reverse=True) # Handle callback if self._delete_cb is not None: - for key in most_commons[self._min_size - 1:]: + for key, _ in most_common[self._min_size - 1:]: self._delete_cb(key) # Keep only the most @_min_size used self._data = {key:self._data[key] - for key in most_commons[:self._min_size - 1]} + for key, _ in most_common[:self._min_size - 1]} self._size = self._min_size # Reset use's counter - self._counter = collections.Counter(self._data.keys()) + self._counter = dict((k, 1) for k in self._data.iterkeys()) + + # Avoid rechecking in dict: set to 1 here, add 1 otherwise + self._counter[asked_key] = 1 + else: + self._counter[asked_key] += 1 self._data[asked_key] = value - self._counter.update([asked_key]) + + def __contains__(self, key): + # Do not call has_key to avoid adding function call overhead + return key in self._data + + def has_key(self, key): + return key in self._data def keys(self): "Return the list of dict's keys" return self._data.keys() def __getitem__(self, key): - self._counter.update([key]) - return self._data[key] + # Retrieve data first to raise the proper exception on error + data = self._data[key] + # Should never raise, since the key is in self._data + self._counter[key] += 1 + return data def __delitem__(self, key): if self._delete_cb is not None: |