about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorserpilliere <serpilliere@users.noreply.github.com>2020-10-05 16:33:43 +0200
committerGitHub <noreply@github.com>2020-10-05 16:33:43 +0200
commitede3a8402c2f0a5c4ea90d9f05bd8e417f778d4a (patch)
treec3226dc8f872877bf232af8957a003ff0abe7fac
parent49aace1a90a8079395e8283bb32f856d7af5d73c (diff)
parent2866b5c40235d4d44a6efb125d8ec75f236b5490 (diff)
downloadmiasm-ede3a8402c2f0a5c4ea90d9f05bd8e417f778d4a.tar.gz
miasm-ede3a8402c2f0a5c4ea90d9f05bd8e417f778d4a.zip
Merge pull request #1294 from serpilliere/location_db_doc
Add LocationDB documentation
Diffstat (limited to '')
-rw-r--r--doc/locationdb/locationdb.ipynb554
1 files changed, 554 insertions, 0 deletions
diff --git a/doc/locationdb/locationdb.ipynb b/doc/locationdb/locationdb.ipynb
new file mode 100644
index 00000000..e09d88e9
--- /dev/null
+++ b/doc/locationdb/locationdb.ipynb
@@ -0,0 +1,554 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# LocationDB object \n",
+    "The `LocationDB` is the Miasm object responsible of the symbols' management. A `Location` is an object representing a code or data (or anywhere else) position. As the name explicits it, the `LocationDB` is a database of locations. Here are some rules:\n",
+    "- each location has exactly *one* associated `LocKey`\n",
+    "- a `LocKey` is linked to a unique `LocationDB` (and must not be used in another `LocationDB`)\n",
+    "- a `LocKey` is very similar to primary key object in a database.\n",
+    "- a `LocKey` can have an optional *offset*.\n",
+    "- a `LocKey` can have multiple symbol names\n",
+    "- two `Lockey`s cannot share an identic offset\n",
+    "- two `LocKey`s cannot share a symol name\n",
+    "\n",
+    "Below are manipulations of the `LocationDB`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import and create a locationDB\n",
+    "from miasm.core.locationdb import LocationDB\n",
+    "loc_db = LocationDB()\n",
+    "print(repr(loc_db))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a location with default attributes (no offset, no symbol name)\n",
+    "loc_a = loc_db.add_location()\n",
+    "print(loc_a)\n",
+    "# Create a second location with an offset\n",
+    "loc_b = loc_db.add_location(offset=112233)\n",
+    "print(loc_b)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add a location with a name\n",
+    "loc_c = loc_db.add_location(name=\"main\")\n",
+    "# Add another alias name to this location\n",
+    "loc_db.add_location_name(loc_c, \"_main_\")\n",
+    "# Add another location\n",
+    "loc_d = loc_db.add_location()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Display LocationDB\n",
+    "print(loc_db)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Associate an offset to an existing location\n",
+    "loc_db.set_location_offset(loc_a, 0x5678)\n",
+    "print(loc_db)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Remove a name from an existing location\n",
+    "loc_db.remove_location_name(loc_c, \"_main_\")\n",
+    "print(loc_db)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the offset of a location\n",
+    "hex(loc_db.get_location_offset(loc_a))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Location with no offset\n",
+    "print(loc_db.get_location_offset(loc_c))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'loc_5678'"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Display locations\n",
+    "loc_db.pretty_str(loc_a)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'loc_1b669'"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "loc_db.pretty_str(loc_b)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'main'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "loc_db.pretty_str(loc_c)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'loc_key_3'"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "loc_db.pretty_str(loc_d)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that:\n",
+    "- if a location doesn't have an offset neither a name, its *pretty* form is `loc_key_XXX`\n",
+    "- if a location has an offset but no name, its *pretty* form is `loc_OFFSET`\n",
+    "- if a location has a name, its *pretty* form is `name`\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Locations usage in Miasm\n",
+    "\n",
+    "The locations are used everywhere in miasm. For example, let disassemble a simple shellcode:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loc_0\n",
+      "CMP        EAX, 0x10\n",
+      "JZ         loc_c\n",
+      "->\tc_next:loc_5 \tc_to:loc_c \n",
+      "loc_c\n",
+      "MOV        EAX, ECX\n",
+      "CALL       loc_11223344\n",
+      "->\tc_next:loc_13 \n",
+      "loc_5\n",
+      "MOV        ESI, EAX\n",
+      "CMOVA      EAX, EBX\n",
+      "JMP        loc_14\n",
+      "->\tc_to:loc_14 \n",
+      "loc_13\n",
+      "INC        EAX\n",
+      "->\tc_next:loc_14 \n",
+      "loc_14\n",
+      "RET        \n"
+     ]
+    }
+   ],
+   "source": [
+    "from miasm.analysis.binary import Container\n",
+    "from miasm.analysis.machine import Machine\n",
+    "\n",
+    "# Create a LocationDB\n",
+    "loc_db = LocationDB()\n",
+    "\n",
+    "# Create a container of bytes\n",
+    "cont = Container.from_string(\n",
+    "    b\"\\x83\\xf8\\x10\\x74\\x07\\x89\\xc6\\x0f\\x47\\xc3\\xeb\\x08\\x89\\xc8\\xe8\\x31\\x33\\x22\\x11\\x40\\xc3\",\n",
+    "    loc_db\n",
+    ")\n",
+    "\n",
+    "# Instantiate a x86 32 bit architecture\n",
+    "machine = Machine(\"x86_32\")\n",
+    "\n",
+    "# Instantiate a disassembler engine, using the previous bin_stream and its\n",
+    "# associated location DB.\n",
+    "mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db)\n",
+    "\n",
+    "# Run a recursive traversal disassembling from address 0\n",
+    "asmcfg = mdis.dis_multiblock(0)\n",
+    "\n",
+    "# Display each basic blocks\n",
+    "for block in asmcfg.blocks:\n",
+    "    print(block)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loc_0\n",
+      "CMP        EAX, 0x10\n",
+      "JZ         loc_c\n",
+      "->\tc_next:loc_5 \tc_to:loc_c \n"
+     ]
+    }
+   ],
+   "source": [
+    "block = asmcfg.getby_offset(0)\n",
+    "print(block)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loc_key_0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# The basic block is placed at a location, which can be retrieved using `.loc_key`\n",
+    "print(block.loc_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loc_key_0: 0x0 - entry\n",
+      "loc_key_1: 0xc - \n",
+      "loc_key_2: 0x5 - \n",
+      "loc_key_3: 0x11223344 - \n",
+      "loc_key_4: 0x13 - \n",
+      "loc_key_5: 0x14 - \n"
+     ]
+    }
+   ],
+   "source": [
+    "# We can add a name to this first location\n",
+    "loc_db.add_location_name(block.loc_key, \"entry\")\n",
+    "print(loc_db)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "entry\n",
+      "CMP        EAX, 0x10\n",
+      "JZ         loc_c\n",
+      "->\tc_next:loc_5 \tc_to:loc_c \n"
+     ]
+    }
+   ],
+   "source": [
+    "# And we can re-display the block:\n",
+    "print(block)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "entry\n",
+      "CMP        EAX, 0x10\n",
+      "JZ         quiche\n",
+      "->\tc_next:loc_5 \tc_to:quiche \n"
+     ]
+    }
+   ],
+   "source": [
+    "# We will give an arbitrary name to location at offset 0xC\n",
+    "loc_c = loc_db.get_offset_location(0xc)\n",
+    "loc_db.add_location_name(loc_c, \"quiche\")\n",
+    "print(block)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Those locations are also used in intermediate representation:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get a lifter\n",
+    "lifter = machine.ira(loc_db)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the intermediate representation of the asmcfg\n",
+    "ircfg = lifter.new_ircfg_from_asmcfg(asmcfg)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "b'entry':\n",
+      "\n",
+      "zf = FLAG_EQ_CMP(EAX, 0x10)\n",
+      "\n",
+      "nf = FLAG_SIGN_SUB(EAX, 0x10)\n",
+      "\n",
+      "pf = parity((EAX + -0x10) & 0xFF)\n",
+      "\n",
+      "cf = FLAG_SUB_CF(EAX, 0x10)\n",
+      "\n",
+      "of = FLAG_SUB_OF(EAX, 0x10)\n",
+      "\n",
+      "af = ((EAX ^ 0x10) ^ (EAX + -0x10))[4:5]\n",
+      "\n",
+      "EIP = CC_EQ(zf)?(b'quiche',loc_5)\n",
+      "\n",
+      "IRDst = CC_EQ(zf)?(b'quiche',loc_5)\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Get location at 0\n",
+    "loc_entry = loc_db.get_offset_location(0)\n",
+    "# Get irblock at this location\n",
+    "irblock = ircfg.blocks[loc_entry]\n",
+    "# Display IRBlock\n",
+    "print(irblock)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In miasm, each expression embeds its size. The location doesn't have a size. To use a location in IR code, you have to wrap it in the Miasm word `ExprLoc`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CC_EQ(zf)?(loc_key_1,loc_key_2)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Get the irblock destination (IRDst value)\n",
+    "dst = irblock.dst\n",
+    "print(dst)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ExprCond(ExprOp('CC_EQ', ExprId('zf', 1)), ExprLoc(<LocKey 1>, 32), ExprLoc(<LocKey 2>, 32))\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(repr(dst))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ExprLoc(<LocKey 1>, 32) ExprLoc(<LocKey 2>, 32)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# It's an ExprCond. We retrieve here the possible values\n",
+    "src1, src2 = dst.src1, dst.src2\n",
+    "print(repr(src1), repr(src2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loc_key_1\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Retrieve the location of the ExprLoc\n",
+    "loc = src1.loc_key\n",
+    "print(loc)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}