about summary refs log tree commit diff stats
path: root/arancini.py
blob: adbe6c878e2bdda85d69afb57078a3d02193a7b9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""Tools for working with arancini's output."""

import re
from functools import partial as bind

from snapshot import ProgramState
from arch.arch import Arch

def parse_break_addresses(lines: list[str]) -> set[int]:
    """Parse all breakpoint addresses from an arancini log."""
    addresses = set()
    for l in lines:
        if l.startswith('INVOKE'):
            addr = int(l.split('=')[1].strip(), base=16)
            addresses.add(addr)

    return addresses

def parse(lines: list[str], arch: Arch) -> list[ProgramState]:
    """Parse an arancini log into a list of snapshots.

    :return: A list of program snapshots.
    """

    labels = get_labels()

    # The regex decides for a line whether it contains a register
    # based on a match with that register's label.
    regex = re.compile("|".join(labels.keys()))

    def try_parse_line(line: str) -> tuple[str, int] | None:
        """Try to parse a register name and that register's value from a line.

        :return: A register name and a register value if the line contains
                 that information. None if parsing fails.
        """
        match = regex.match(line)
        if match:
            label = match.group(0)
            register, get_reg_value = labels[label]
            return register, get_reg_value(line)
        return None

    # Parse a list of program snapshots
    snapshots = []
    for line in lines:
        if 'Backwards' in line and len(snapshots) > 0:
            snapshots[-1].set_backwards()
            continue

        match = try_parse_line(line)
        if match:
            reg, value = match
            if reg == 'PC':
                snapshots.append(ProgramState(arch))
            snapshots[-1].set(reg, value)

    return snapshots

def get_labels():
    """Construct a helper structure for the arancini log parser."""
    split_value = lambda x,i: int(x.split()[i], 16)

    split_first = bind(split_value, i=1)
    split_second = bind(split_value, i=2)

    split_equal = lambda x,i: int(x.split('=')[i], 16)

    # A mapping from regex patterns to the register name and a
    # function that extracts that register's value from the line
    labels = {'INVOKE':  ('PC',      bind(split_equal, i=1)),
              'RAX':     ('RAX',     split_first),
              'RBX':     ('RBX',     split_first),
              'RCX':     ('RCX',     split_first),
              'RDX':     ('RDX',     split_first),
              'RSI':     ('RSI',     split_first),
              'RDI':     ('RDI',     split_first),
              'RBP':     ('RBP',     split_first),
              'RSP':     ('RSP',     split_first),
              'R8':      ('R8',      split_first),
              'R9':      ('R9',      split_first),
              'R10':     ('R10',     split_first),
              'R11':     ('R11',     split_first),
              'R12':     ('R12',     split_first),
              'R13':     ('R13',     split_first),
              'R14':     ('R14',     split_first),
              'R15':     ('R15',     split_first),
              'flag ZF': ('flag ZF', split_second),
              'flag CF': ('flag CF', split_second),
              'flag OF': ('flag OF', split_second),
              'flag SF': ('flag SF', split_second),
              'flag PF': ('flag PF', split_second),
              'flag DF': ('flag DF', split_second)}
    return labels