main.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161

#! /bin/python3

import argparse
import platform
from typing import Iterable

import arancini
from arch import x86
from compare import compare_simple, compare_symbolic
from lldb_target import LLDBConcreteTarget, record_snapshot
from symbolic import collect_symbolic_trace
from utils import check_version, print_separator

def run_native_execution(oracle_program: str, breakpoints: Iterable[int]):
    """Gather snapshots from a native execution via an external debugger.

    :param oracle_program: Program to execute.
    :param breakpoints: List of addresses at which to break and record the
                        program's state.

    :return: A list of snapshots gathered from the execution.
    """
    assert(platform.machine() == "x86_64")

    target = LLDBConcreteTarget(oracle_program)

    # Set breakpoints
    for address in breakpoints:
        target.set_breakpoint(address)

    # Execute the native program
    snapshots = []
    while not target.is_exited():
        snapshots.append(record_snapshot(target))
        target.run()

    return snapshots

def parse_inputs(txl_path, ref_path, program):
    # Our architecture
    arch = x86.ArchX86()

    txl = []
    with open(txl_path, "r") as txl_file:
        txl = arancini.parse(txl_file.readlines(), arch)

    ref = []
    if program is not None:
        with open(txl_path, "r") as txl_file:
            breakpoints = arancini.parse_break_addresses(txl_file.readlines())
        ref = run_native_execution(program, breakpoints)
    else:
        assert(ref_path is not None)
        with open(ref_path, "r") as native_file:
            ref = arancini.parse(native_file.readlines(), arch)

    return txl, ref

def parse_arguments():
    parser = argparse.ArgumentParser(description='Comparator for emulator logs to reference')
    parser.add_argument('-p', '--program',
                        type=str,
                        help='Path to oracle program')
    parser.add_argument('-r', '--ref',
                        type=str,
                        required=True,
                        help='Path to the reference log (gathered with run.sh)')
    parser.add_argument('-t', '--txl',
                        type=str,
                        required=True,
                        help='Path to the translation log (gathered via Arancini)')
    parser.add_argument('-s', '--stats',
                        action='store_true',
                        default=False,
                        help='Run statistics on comparisons')
    parser.add_argument('-v', '--verbose',
                        action='store_true',
                        default=True,
                        help='Path to oracle program')
    parser.add_argument('--symbolic',
                        action='store_true',
                        default=False,
                        help='Use an advanced algorithm that uses symbolic'
                             ' execution to determine accurate data'
                             ' transformations')
    args = parser.parse_args()
    return args

def main():
    args = parse_arguments()

    txl_path = args.txl
    reference_path = args.ref
    program = args.program

    stats = args.stats
    verbose = args.verbose

    if verbose:
        print("Enabling verbose program output")
        print(f"Verbose: {verbose}")
        print(f"Statistics: {stats}")
        print(f"Symbolic: {args.symbolic}")

    if program is None and reference_path is None:
        raise ValueError('Either program or path to native file must be'
                         'provided')

    txl, ref = parse_inputs(txl_path, reference_path, program)

    if program != None and reference_path != None:
        with open(reference_path, 'w') as w:
            for snapshot in ref:
                print(snapshot, file=w)

    if args.symbolic:
        assert(program is not None)

        transforms = collect_symbolic_trace(program, [program])

        new = transforms[0] \
            .concat(transforms[1]) \
            .concat(transforms[2]) \
            .concat(transforms[3]) \
            .concat(transforms[4])
        print(f'New transform: {new}')
        exit(0)
        # TODO: Transform the traces so that the states match
        result = compare_symbolic(txl, transforms)

        raise NotImplementedError('The symbolic comparison algorithm is not'
                                  ' supported yet.')
    else:
        result = compare_simple(txl, ref)

    # Print results
    for res in result:
        pc = res['pc']
        print_separator()
        print(f'For PC={hex(pc)}')
        print_separator()

        txl = res['txl']
        ref = res['ref']
        for err in res['errors']:
            reg = err['reg']
            print(f'Content of register {reg} is possibly false.'
                  f' Expected difference: {err["expected"]}, actual difference'
                  f' in the translation: {err["actual"]}.\n'
                  f'    (txl) {reg}: {hex(txl.read(reg))}\n'
                  f'    (ref) {reg}: {hex(ref.read(reg))}')

    print()
    print('#' * 60)
    print(f'Found {sum(len(res["errors"]) for res in result)} errors.')
    print('#' * 60)
    print()

if __name__ == "__main__":
    check_version('3.7')
    main()