diff options
Diffstat (limited to 'focaccia.py')
| -rwxr-xr-x | focaccia.py | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/focaccia.py b/focaccia.py new file mode 100755 index 0000000..e140337 --- /dev/null +++ b/focaccia.py @@ -0,0 +1,187 @@ +#! /bin/python3 + +import argparse +import platform +from typing import Iterable + +from focaccia.arch import x86 +from focaccia.compare import compare_simple, compare_symbolic, \ + ErrorSeverity, ErrorTypes +from focaccia.lldb_target import LLDBConcreteTarget +from focaccia.parser import parse_arancini +from focaccia.snapshot import ProgramState +from focaccia.symbolic import SymbolicTransform, collect_symbolic_trace +from focaccia.utils import print_separator + +def run_native_execution(oracle_program: str, breakpoints: Iterable[int]): + """Gather snapshots from a native execution via an external debugger. + + :param oracle_program: Program to execute. + :param breakpoints: List of addresses at which to break and record the + program's state. + + :return: A list of snapshots gathered from the execution. + """ + assert(platform.machine() == "x86_64") + + target = LLDBConcreteTarget(oracle_program) + + # Set breakpoints + for address in breakpoints: + target.set_breakpoint(address) + + # Execute the native program + snapshots = [] + while not target.is_exited(): + snapshots.append(target.record_snapshot()) + target.run() + + return snapshots + +def match_traces(test: list[ProgramState], truth: list[SymbolicTransform]): + if not test or not truth: + return [], [] + + assert(test[0].read('pc') == truth[0].addr) + + def index(seq, target, access=lambda el: el): + for i, el in enumerate(seq): + if access(el) == target: + return i + return None + + i = 0 + for next_state in test[1:]: + next_pc = next_state.read('pc') + index_in_truth = index(truth[i:], next_pc, lambda el: el.range[1]) + + # If no next element (i.e. no foldable range) is found in the truth + # trace, assume that the test trace contains excess states. Remove one + # and try again. This might skip testing some states, but covers more + # of the entire trace. + if index_in_truth is None: + test.pop(i + 1) + continue + + # Fold the range of truth states until the next test state + for _ in range(index_in_truth): + truth[i].concat(truth.pop(i + 1)) + + assert(truth[i].range[1] == truth[i + 1].addr) + + i += 1 + if len(truth) <= i: + break + + return test, truth + +def parse_inputs(txl_path, program): + # Our architecture + arch = x86.ArchX86() + + with open(txl_path, "r") as txl_file: + txl = parse_arancini(txl_file, arch) + + with open(txl_path, "r") as txl_file: + breakpoints = [state.read('PC') for state in txl] + ref = run_native_execution(program, breakpoints) + + return txl, ref + +def parse_arguments(): + parser = argparse.ArgumentParser(description='Comparator for emulator logs to reference') + parser.add_argument('-p', '--program', + type=str, + required=True, + help='Path to oracle program') + parser.add_argument('-a', '--program-arg', + type=str, + required=False, + default=[], + action='append', + help='Arguments to the program specified with --program.') + parser.add_argument('-t', '--txl', + type=str, + required=True, + help='Path to the translation log (gathered via Arancini)') + parser.add_argument('--symbolic', + action='store_true', + default=False, + help='Use an advanced algorithm that uses symbolic' + ' execution to determine accurate data' + ' transformations. This improves the quality of' + ' generated errors significantly, but may take' + ' more time to run.') + parser.add_argument('--error-level', + type=str, + default='verbose', + choices=['verbose', 'errors', 'restricted'], + help='Verbosity of reported errors. \'errors\' reports' + ' everything that might be an error in the' + ' translation, while \'verbose\' may report' + ' additional errors from incomplete input' + ' data, etc. [Default: verbose]') + args = parser.parse_args() + return args + +def print_result(result, min_severity: ErrorSeverity): + shown = 0 + suppressed = 0 + + for res in result: + pc = res['pc'] + print_separator() + print(f'For PC={hex(pc)}') + print_separator() + + # Filter errors by severity + errs = [e for e in res['errors'] if e.severity >= min_severity] + suppressed += len(res['errors']) - len(errs) + shown += len(errs) + + # Print all non-suppressed errors + for n, err in enumerate(errs, start=1): + print(f' {n:2}. {err}') + + if errs: + print() + print(f'Expected transformation: {res["ref"]}') + print(f'Actual transformation: {res["txl"]}') + else: + print('No errors found.') + + print() + print('#' * 60) + print(f'Found {shown} errors.') + print(f'Suppressed {suppressed} low-priority errors' + f' (showing {min_severity} and higher).') + print('#' * 60) + print() + +def main(): + verbosity = { + 'verbose': ErrorTypes.INFO, + 'errors': ErrorTypes.POSSIBLE, + 'restricted': ErrorTypes.CONFIRMED, + } + args = parse_arguments() + + txl_path = args.txl + program = args.program + prog_args = args.program_arg + txl, ref = parse_inputs(txl_path, program) + + if args.symbolic: + assert(program is not None) + + print(f'Tracing {program} symbolically with arguments {prog_args}...') + transforms = collect_symbolic_trace(program, prog_args) + txl, transforms = match_traces(txl, transforms) + result = compare_symbolic(txl, transforms) + else: + result = compare_simple(txl, ref) + + print_result(result, verbosity[args.error_level]) + +if __name__ == '__main__': + main() |