about summary refs log tree commit diff stats
path: root/arancini.py
diff options
context:
space:
mode:
authorTheofilos Augoustis <theofilos.augoustis@gmail.com>2023-10-11 16:21:21 +0200
committerTheofilos Augoustis <theofilos.augoustis@gmail.com>2023-10-11 16:21:21 +0200
commit69c55d68d68c00007afa1af76a1d06f74ee72fe6 (patch)
tree991b92b4a5ba447b9fb5f77db4377bd9d14fbdf9 /arancini.py
parentb9c08cadc158b18d7cab14a830a9e11f590ec7bd (diff)
downloadfocaccia-69c55d68d68c00007afa1af76a1d06f74ee72fe6.tar.gz
focaccia-69c55d68d68c00007afa1af76a1d06f74ee72fe6.zip
Refactor file structure
- main.py: focaccia user-interface

- snapshot.py: state trace snapshots handling

- compare.py: snapshot comparison algorithms

- run.py: native execution tracer

- arancini.py: Arancini log handling

- arch/: per-architecture abstractions

Co-authored-by: Theofilos Augoustis <theofilos.augoustis@gmail.com>
Co-authored-by: Nicola Crivellin <nicola.crivellin98@gmail.com>
Diffstat (limited to 'arancini.py')
-rw-r--r--arancini.py94
1 files changed, 94 insertions, 0 deletions
diff --git a/arancini.py b/arancini.py
new file mode 100644
index 0000000..adbe6c8
--- /dev/null
+++ b/arancini.py
@@ -0,0 +1,94 @@
+"""Tools for working with arancini's output."""
+
+import re
+from functools import partial as bind
+
+from snapshot import ProgramState
+from arch.arch import Arch
+
+def parse_break_addresses(lines: list[str]) -> set[int]:
+    """Parse all breakpoint addresses from an arancini log."""
+    addresses = set()
+    for l in lines:
+        if l.startswith('INVOKE'):
+            addr = int(l.split('=')[1].strip(), base=16)
+            addresses.add(addr)
+
+    return addresses
+
+def parse(lines: list[str], arch: Arch) -> list[ProgramState]:
+    """Parse an arancini log into a list of snapshots.
+
+    :return: A list of program snapshots.
+    """
+
+    labels = get_labels()
+
+    # The regex decides for a line whether it contains a register
+    # based on a match with that register's label.
+    regex = re.compile("|".join(labels.keys()))
+
+    def try_parse_line(line: str) -> tuple[str, int] | None:
+        """Try to parse a register name and that register's value from a line.
+
+        :return: A register name and a register value if the line contains
+                 that information. None if parsing fails.
+        """
+        match = regex.match(line)
+        if match:
+            label = match.group(0)
+            register, get_reg_value = labels[label]
+            return register, get_reg_value(line)
+        return None
+
+    # Parse a list of program snapshots
+    snapshots = []
+    for line in lines:
+        if 'Backwards' in line and len(snapshots) > 0:
+            snapshots[-1].set_backwards()
+            continue
+
+        match = try_parse_line(line)
+        if match:
+            reg, value = match
+            if reg == 'PC':
+                snapshots.append(ProgramState(arch))
+            snapshots[-1].set(reg, value)
+
+    return snapshots
+
+def get_labels():
+    """Construct a helper structure for the arancini log parser."""
+    split_value = lambda x,i: int(x.split()[i], 16)
+
+    split_first = bind(split_value, i=1)
+    split_second = bind(split_value, i=2)
+
+    split_equal = lambda x,i: int(x.split('=')[i], 16)
+
+    # A mapping from regex patterns to the register name and a
+    # function that extracts that register's value from the line
+    labels = {'INVOKE':  ('PC',      bind(split_equal, i=1)),
+              'RAX':     ('RAX',     split_first),
+              'RBX':     ('RBX',     split_first),
+              'RCX':     ('RCX',     split_first),
+              'RDX':     ('RDX',     split_first),
+              'RSI':     ('RSI',     split_first),
+              'RDI':     ('RDI',     split_first),
+              'RBP':     ('RBP',     split_first),
+              'RSP':     ('RSP',     split_first),
+              'R8':      ('R8',      split_first),
+              'R9':      ('R9',      split_first),
+              'R10':     ('R10',     split_first),
+              'R11':     ('R11',     split_first),
+              'R12':     ('R12',     split_first),
+              'R13':     ('R13',     split_first),
+              'R14':     ('R14',     split_first),
+              'R15':     ('R15',     split_first),
+              'flag ZF': ('flag ZF', split_second),
+              'flag CF': ('flag CF', split_second),
+              'flag OF': ('flag OF', split_second),
+              'flag SF': ('flag SF', split_second),
+              'flag PF': ('flag PF', split_second),
+              'flag DF': ('flag DF', split_second)}
+    return labels