about summary refs log tree commit diff stats
path: root/src/miasm/core/locationdb.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/miasm/core/locationdb.py')
-rw-r--r--src/miasm/core/locationdb.py495
1 files changed, 495 insertions, 0 deletions
diff --git a/src/miasm/core/locationdb.py b/src/miasm/core/locationdb.py
new file mode 100644
index 00000000..b7e16ea2
--- /dev/null
+++ b/src/miasm/core/locationdb.py
@@ -0,0 +1,495 @@
+import warnings
+from builtins import int as int_types
+
+from functools import reduce
+from future.utils import viewitems, viewvalues
+
+from miasm.core.utils import printable
+from miasm.expression.expression import LocKey, ExprLoc
+
+
+class LocationDB(object):
+    """
+    LocationDB is a "database" of information associated to location.
+
+    An entry in a LocationDB is uniquely identified with a LocKey.
+    Additional information which can be associated with a LocKey are:
+    - an offset (uniq per LocationDB)
+    - several names (each are uniqs per LocationDB)
+
+    As a schema:
+    loc_key  1 <-> 0..1  offset
+             1 <-> 0..n  name
+
+    >>> loc_db = LocationDB()
+    # Add a location with no additional information
+    >>> loc_key1 = loc_db.add_location()
+    # Add a location with an offset
+    >>> loc_key2 = loc_db.add_location(offset=0x1234)
+    # Add a location with several names
+    >>> loc_key3 = loc_db.add_location(name="first_name")
+    >>> loc_db.add_location_name(loc_key3, "second_name")
+    # Associate an offset to an existing location
+    >>> loc_db.set_location_offset(loc_key3, 0x5678)
+    # Remove a name from an existing location
+    >>> loc_db.remove_location_name(loc_key3, "second_name")
+
+    # Get back offset
+    >>> loc_db.get_location_offset(loc_key1)
+    None
+    >>> loc_db.get_location_offset(loc_key2)
+    0x1234
+    >>> loc_db.get_location_offset("first_name")
+    0x5678
+
+    # Display a location
+    >>> loc_db.pretty_str(loc_key1)
+    loc_key_1
+    >>> loc_db.pretty_str(loc_key2)
+    loc_1234
+    >>> loc_db.pretty_str(loc_key3)
+    first_name
+    """
+
+    def __init__(self):
+        # Known LocKeys
+        self._loc_keys = set()
+
+        # Association tables
+        self._loc_key_to_offset = {}
+        self._loc_key_to_names = {}
+        self._name_to_loc_key = {}
+        self._offset_to_loc_key = {}
+
+        # Counter for new LocKey generation
+        self._loc_key_num = 0
+
+    def get_location_offset(self, loc_key):
+        """
+        Return the offset of @loc_key if any, None otherwise.
+        @loc_key: LocKey instance
+        """
+        assert isinstance(loc_key, LocKey)
+        return self._loc_key_to_offset.get(loc_key)
+
+    def get_location_names(self, loc_key):
+        """
+        Return the frozenset of names associated to @loc_key
+        @loc_key: LocKey instance
+        """
+        assert isinstance(loc_key, LocKey)
+        return frozenset(self._loc_key_to_names.get(loc_key, set()))
+
+    def get_name_location(self, name):
+        """
+        Return the LocKey of @name if any, None otherwise.
+        @name: target name
+        """
+        assert isinstance(name, str)
+        return self._name_to_loc_key.get(name)
+
+    def get_or_create_name_location(self, name):
+        """
+        Return the LocKey of @name if any, create one otherwise.
+        @name: target name
+        """
+        assert isinstance(name, str)
+        loc_key = self._name_to_loc_key.get(name)
+        if loc_key is not None:
+            return loc_key
+        return self.add_location(name=name)
+
+    def get_offset_location(self, offset):
+        """
+        Return the LocKey of @offset if any, None otherwise.
+        @offset: target offset
+        """
+        return self._offset_to_loc_key.get(offset)
+
+    def get_or_create_offset_location(self, offset):
+        """
+        Return the LocKey of @offset if any, create one otherwise.
+        @offset: target offset
+        """
+        loc_key = self._offset_to_loc_key.get(offset)
+        if loc_key is not None:
+            return loc_key
+        return self.add_location(offset=offset)
+
+    def get_name_offset(self, name):
+        """
+        Return the offset of @name if any, None otherwise.
+        @name: target name
+        """
+        assert isinstance(name, str)
+        loc_key = self.get_name_location(name)
+        if loc_key is None:
+            return None
+        return self.get_location_offset(loc_key)
+
+    def add_location_name(self, loc_key, name):
+        """Associate a name @name to a given @loc_key
+        @name: str instance
+        @loc_key: LocKey instance
+        """
+        assert isinstance(name, str)
+        assert loc_key in self._loc_keys
+        already_existing_loc = self._name_to_loc_key.get(name)
+        if already_existing_loc is not None and already_existing_loc != loc_key:
+            raise KeyError("%r is already associated to a different loc_key "
+                           "(%r)" % (name, already_existing_loc))
+        self._loc_key_to_names.setdefault(loc_key, set()).add(name)
+        self._name_to_loc_key[name] = loc_key
+
+    def remove_location_name(self, loc_key, name):
+        """Disassociate a name @name from a given @loc_key
+        Fail if @name is not already associated to @loc_key
+        @name: str instance
+        @loc_key: LocKey instance
+        """
+        assert loc_key in self._loc_keys
+        assert isinstance(name, str)
+        already_existing_loc = self._name_to_loc_key.get(name)
+        if already_existing_loc is None:
+            raise KeyError("%r is not already associated" % name)
+        if already_existing_loc != loc_key:
+            raise KeyError("%r is already associated to a different loc_key "
+                           "(%r)" % (name, already_existing_loc))
+        del self._name_to_loc_key[name]
+        self._loc_key_to_names[loc_key].remove(name)
+
+    def set_location_offset(self, loc_key, offset, force=False):
+        """Associate the offset @offset to an LocKey @loc_key
+
+        If @force is set, override silently. Otherwise, if an offset is already
+        associated to @loc_key, an error will be raised
+        """
+        assert loc_key in self._loc_keys
+        already_existing_loc = self.get_offset_location(offset)
+        if already_existing_loc is not None and already_existing_loc != loc_key:
+            raise KeyError("%r is already associated to a different loc_key "
+                           "(%r)" % (offset, already_existing_loc))
+        already_existing_off = self._loc_key_to_offset.get(loc_key)
+        if (already_existing_off is not None and
+            already_existing_off != offset):
+            if not force:
+                raise ValueError(
+                    "%r already has an offset (0x%x). Use 'force=True'"
+                    " for silent overriding" % (
+                        loc_key, already_existing_off
+                    ))
+            else:
+                self.unset_location_offset(loc_key)
+        self._offset_to_loc_key[offset] = loc_key
+        self._loc_key_to_offset[loc_key] = offset
+
+    def unset_location_offset(self, loc_key):
+        """Disassociate LocKey @loc_key's offset
+
+        Fail if there is already no offset associate with it
+        @loc_key: LocKey
+        """
+        assert loc_key in self._loc_keys
+        already_existing_off = self._loc_key_to_offset.get(loc_key)
+        if already_existing_off is None:
+            raise ValueError("%r already has no offset" % (loc_key))
+        del self._offset_to_loc_key[already_existing_off]
+        del self._loc_key_to_offset[loc_key]
+
+    def consistency_check(self):
+        """Ensure internal structures are consistent with each others"""
+        assert set(self._loc_key_to_names).issubset(self._loc_keys)
+        assert set(self._loc_key_to_offset).issubset(self._loc_keys)
+        assert self._loc_key_to_offset == {v: k for k, v in viewitems(self._offset_to_loc_key)}
+        assert reduce(
+            lambda x, y:x.union(y),
+            viewvalues(self._loc_key_to_names),
+            set(),
+        ) == set(self._name_to_loc_key)
+        for name, loc_key in viewitems(self._name_to_loc_key):
+            assert name in self._loc_key_to_names[loc_key]
+
+    def find_free_name(self, name):
+        """
+        If @name is not known in DB, return it
+        Else append an index to it corresponding to the next unknown name
+
+        @name: string
+        """
+        assert isinstance(name, str)
+        if self.get_name_location(name) is None:
+            return name
+        i = 0
+        while True:
+            new_name = "%s_%d" % (name, i)
+            if self.get_name_location(new_name) is None:
+                return new_name
+            i += 1
+
+    def add_location(self, name=None, offset=None, strict=True):
+        """Add a new location in the locationDB. Returns the corresponding LocKey.
+        If @name is set, also associate a name to this new location.
+        If @offset is set, also associate an offset to this new location.
+
+        Strict mode (set by @strict, default):
+          If a location with @offset or @name already exists, an error will be
+        raised.
+        Otherwise:
+          If a location with @offset or @name already exists, the corresponding
+        LocKey may be updated and will be returned.
+        """
+
+        # Deprecation handling
+        if isinstance(name, int_types):
+            assert offset is None or offset == name
+            warnings.warn("Deprecated API: use 'add_location(offset=)' instead."
+                          " An additional 'name=' can be provided to also "
+                          "associate a name (there is no more default name)")
+            offset = name
+            name = None
+
+        # Argument cleaning
+        offset_loc_key = None
+        if offset is not None:
+            offset = int(offset)
+            offset_loc_key = self.get_offset_location(offset)
+
+        # Test for collisions
+        name_loc_key = None
+        if name is not None:
+            assert isinstance(name, str)
+            name_loc_key = self.get_name_location(name)
+
+        if strict:
+            if name_loc_key is not None:
+                raise ValueError("An entry for %r already exists (%r), and "
+                                 "strict mode is enabled" % (
+                                     name, name_loc_key
+                                 ))
+            if offset_loc_key is not None:
+                raise ValueError("An entry for 0x%x already exists (%r), and "
+                                 "strict mode is enabled" % (
+                                     offset, offset_loc_key
+                                 ))
+        else:
+            # Non-strict mode
+            if name_loc_key is not None:
+                known_offset = self.get_offset_location(name_loc_key)
+                if known_offset is None:
+                    if offset is not None:
+                        self.set_location_offset(name_loc_key, offset)
+                elif known_offset != offset:
+                    raise ValueError(
+                        "Location with name '%s' already have an offset: 0x%x "
+                        "(!= 0x%x)" % (name, offset, known_offset)
+                        )
+                # Name already known, same offset -> nothing to do
+                return name_loc_key
+
+            elif offset_loc_key is not None:
+                if name is not None:
+                    # Check for already known name are checked above
+                    return self.add_location_name(offset_loc_key, name)
+                # Offset already known, no name specified
+                return offset_loc_key
+
+        # No collision, this is a brand new location
+        loc_key = LocKey(self._loc_key_num)
+        self._loc_key_num += 1
+        self._loc_keys.add(loc_key)
+
+        if offset is not None:
+            assert offset not in self._offset_to_loc_key
+            self._offset_to_loc_key[offset] = loc_key
+            self._loc_key_to_offset[loc_key] = offset
+
+        if name is not None:
+            self._name_to_loc_key[name] = loc_key
+            self._loc_key_to_names[loc_key] = set([name])
+
+        return loc_key
+
+    def remove_location(self, loc_key):
+        """
+        Delete the location corresponding to @loc_key
+        @loc_key: LocKey instance
+        """
+        assert isinstance(loc_key, LocKey)
+        if loc_key not in self._loc_keys:
+            raise KeyError("Unknown loc_key %r" % loc_key)
+        names = self._loc_key_to_names.pop(loc_key, [])
+        for name in names:
+            del self._name_to_loc_key[name]
+        offset = self._loc_key_to_offset.pop(loc_key, None)
+        self._offset_to_loc_key.pop(offset, None)
+        self._loc_keys.remove(loc_key)
+
+    def pretty_str(self, loc_key):
+        """Return a human readable version of @loc_key, according to information
+        available in this LocationDB instance"""
+        names = self.get_location_names(loc_key)
+        new_names = set()
+        for name in names:
+            try:
+                name = name.decode()
+            except AttributeError:
+                pass
+            new_names.add(name)
+        names = new_names
+        if names:
+            return ",".join(names)
+        offset = self.get_location_offset(loc_key)
+        if offset is not None:
+            return "loc_%x" % offset
+        return str(loc_key)
+
+    @property
+    def loc_keys(self):
+        """Return all loc_keys"""
+        return self._loc_keys
+
+    @property
+    def names(self):
+        """Return all known names"""
+        return list(self._name_to_loc_key)
+
+    @property
+    def offsets(self):
+        """Return all known offsets"""
+        return list(self._offset_to_loc_key)
+
+    def __str__(self):
+        out = []
+        for loc_key in self._loc_keys:
+            names = self.get_location_names(loc_key)
+            offset = self.get_location_offset(loc_key)
+            out.append(
+                "%s: %s - %s" % (
+                    loc_key,
+                    "0x%x" % offset if offset is not None else None,
+                    ",".join(printable(name) for name in names)
+                )
+            )
+        return "\n".join(out)
+
+    def merge(self, location_db):
+        """Merge with another LocationDB @location_db
+
+        WARNING: old reference to @location_db information (such as LocKeys)
+        must be retrieved from the updated version of this instance. The
+        dedicated "get_*" APIs may be used for this task
+        """
+        # A simple merge is not doable here, because LocKey will certainly
+        # collides
+
+        for foreign_loc_key in location_db.loc_keys:
+            foreign_names = location_db.get_location_names(foreign_loc_key)
+            foreign_offset = location_db.get_location_offset(foreign_loc_key)
+            if foreign_names:
+                init_name = list(foreign_names)[0]
+            else:
+                init_name = None
+            loc_key = self.add_location(offset=foreign_offset, name=init_name,
+                                        strict=False)
+            cur_names = self.get_location_names(loc_key)
+            for name in foreign_names:
+                if name not in cur_names and name != init_name:
+                    self.add_location_name(loc_key, name=name)
+
+    def canonize_to_exprloc(self, expr):
+        """
+        If expr is ExprInt, return ExprLoc with corresponding loc_key
+        Else, return expr
+
+        @expr: Expr instance
+        """
+        if expr.is_int():
+            loc_key = self.get_or_create_offset_location(int(expr))
+            ret = ExprLoc(loc_key, expr.size)
+            return ret
+        return expr
+
+    # Deprecated APIs
+    @property
+    def items(self):
+        """Return all loc_keys"""
+        warnings.warn('DEPRECATION WARNING: use "loc_keys" instead of "items"')
+        return list(self._loc_keys)
+
+    def __getitem__(self, item):
+        warnings.warn('DEPRECATION WARNING: use "get_name_location" or '
+                      '"get_offset_location"')
+        if item in self._name_to_loc_key:
+            return self._name_to_loc_key[item]
+        if item in self._offset_to_loc_key:
+            return self._offset_to_loc_key[item]
+        raise KeyError('unknown symbol %r' % item)
+
+    def __contains__(self, item):
+        warnings.warn('DEPRECATION WARNING: use "get_name_location" or '
+                      '"get_offset_location", or ".offsets" or ".names"')
+        return item in self._name_to_loc_key or item in self._offset_to_loc_key
+
+    def loc_key_to_name(self, loc_key):
+        """[DEPRECATED API], see 'get_location_names'"""
+        warnings.warn("Deprecated API: use 'get_location_names'")
+        return sorted(self.get_location_names(loc_key))[0]
+
+    def loc_key_to_offset(self, loc_key):
+        """[DEPRECATED API], see 'get_location_offset'"""
+        warnings.warn("Deprecated API: use 'get_location_offset'")
+        return self.get_location_offset(loc_key)
+
+    def remove_loc_key(self, loc_key):
+        """[DEPRECATED API], see 'remove_location'"""
+        warnings.warn("Deprecated API: use 'remove_location'")
+        self.remove_location(loc_key)
+
+    def del_loc_key_offset(self, loc_key):
+        """[DEPRECATED API], see 'unset_location_offset'"""
+        warnings.warn("Deprecated API: use 'unset_location_offset'")
+        self.unset_location_offset(loc_key)
+
+    def getby_offset(self, offset):
+        """[DEPRECATED API], see 'get_offset_location'"""
+        warnings.warn("Deprecated API: use 'get_offset_location'")
+        return self.get_offset_location(offset)
+
+    def getby_name(self, name):
+        """[DEPRECATED API], see 'get_name_location'"""
+        warnings.warn("Deprecated API: use 'get_name_location'")
+        return self.get_name_location(name)
+
+    def getby_offset_create(self, offset):
+        """[DEPRECATED API], see 'get_or_create_offset_location'"""
+        warnings.warn("Deprecated API: use 'get_or_create_offset_location'")
+        return self.get_or_create_offset_location(offset)
+
+    def getby_name_create(self, name):
+        """[DEPRECATED API], see 'get_or_create_name_location'"""
+        warnings.warn("Deprecated API: use 'get_or_create_name_location'")
+        return self.get_or_create_name_location(name)
+
+    def rename_location(self, loc_key, newname):
+        """[DEPRECATED API], see 'add_name_location' and 'remove_location_name'
+        """
+        warnings.warn("Deprecated API: use 'add_location_name' and "
+                      "'remove_location_name'")
+        for name in self.get_location_names(loc_key):
+            self.remove_location_name(loc_key, name)
+        self.add_location_name(loc_key, name)
+
+    def set_offset(self, loc_key, offset):
+        """[DEPRECATED API], see 'set_location_offset'"""
+        warnings.warn("Deprecated API: use 'set_location_offset'")
+        self.set_location_offset(loc_key, offset, force=True)
+
+    def gen_loc_key(self):
+        """[DEPRECATED API], see 'add_location'"""
+        warnings.warn("Deprecated API: use 'add_location'")
+        return self.add_location()
+
+    def str_loc_key(self, loc_key):
+        """[DEPRECATED API], see 'pretty_str'"""
+        warnings.warn("Deprecated API: use 'pretty_str'")
+        return self.pretty_str(loc_key)