diff options
| -rwxr-xr-x | focaccia.py | 189 | ||||
| -rw-r--r-- | focaccia/miasm_util.py | 7 |
2 files changed, 132 insertions, 64 deletions
diff --git a/focaccia.py b/focaccia.py index 0637690..f0c6efe 100755 --- a/focaccia.py +++ b/focaccia.py @@ -2,20 +2,19 @@ import argparse import platform -from typing import Iterable, Tuple +from typing import Callable, Iterable -from focaccia.arch import supported_architectures +import focaccia.parser as parser +from focaccia.arch import supported_architectures, Arch from focaccia.compare import compare_simple, compare_symbolic, ErrorTypes from focaccia.lldb_target import LLDBConcreteTarget from focaccia.match import fold_traces, match_traces -from focaccia.parser import parse_arancini, parse_snapshots from focaccia.snapshot import ProgramState -from focaccia.symbolic import collect_symbolic_trace -from focaccia.utils import print_result +from focaccia.symbolic import collect_symbolic_trace, SymbolicTransform +from focaccia.utils import print_result, get_envp from focaccia.reproducer import Reproducer from focaccia.compare import ErrorSeverity - - +from focaccia.trace import Trace, TraceEnvironment verbosity = { 'info': ErrorTypes.INFO, @@ -23,17 +22,34 @@ verbosity = { 'error': ErrorTypes.CONFIRMED, } -def collect_concrete_trace(oracle_program: str, breakpoints: Iterable[int]) \ - -> Tuple[list[ProgramState], list]: +concrete_trace_parsers = { + 'focaccia': lambda f, _: parser.parse_snapshots(f), + 'qemu': parser.parse_qemu, + 'arancini': parser.parse_arancini, +} + +_MatchingAlgorithm = Callable[ + [list[ProgramState], list[SymbolicTransform]], + tuple[list[ProgramState], list[SymbolicTransform]] +] + +matching_algorithms: dict[str, _MatchingAlgorithm] = { + 'none': lambda c, s: (c, s), + 'simple': match_traces, + 'fold': fold_traces, +} + +def collect_concrete_trace(env: TraceEnvironment, breakpoints: Iterable[int]) \ + -> list[ProgramState]: """Gather snapshots from a native execution via an external debugger. - :param oracle_program: Program to execute. + :param env: Program to execute and the environment in which to execute it. :param breakpoints: List of addresses at which to break and record the program's state. :return: A list of snapshots gathered from the execution. """ - target = LLDBConcreteTarget(oracle_program) + target = LLDBConcreteTarget(env.binary_name, env.argv, env.envp) # Set breakpoints for address in breakpoints: @@ -41,29 +57,56 @@ def collect_concrete_trace(oracle_program: str, breakpoints: Iterable[int]) \ # Execute the native program snapshots = [] - basic_blocks = [] while not target.is_exited(): snapshots.append(target.record_snapshot()) - basic_blocks.append(target.get_next_basic_block()) target.run() - return snapshots, basic_blocks + return snapshots def parse_arguments(): - parser = argparse.ArgumentParser(description='Comparator for emulator logs to reference') - parser.add_argument('-p', '--program', - type=str, - required=True, - help='Path to oracle program') - parser.add_argument('-a', '--program-arg', - type=str, + parser = argparse.ArgumentParser() + parser.description = '''An emulator tester and verifier. + +You can pre-record symbolic traces with `tools/capture_transforms.py`, then pass +them to the verifier with the --oracle-trace argument. +''' + + # Specification of the symbolic truth trace + symb_trace = parser.add_mutually_exclusive_group(required=True) + symb_trace.add_argument('-p', '--oracle-program', + help='A program from which a symbolic truth will be' + ' recorded.') + symb_trace.add_argument('-o', '--oracle-trace', '--symb-trace', + help='A symbolic trace that serves as a truth state' + ' for comparison.') + parser.add_argument('-a', '--oracle-args', + nargs='*', default=[], - action='append', - help='Arguments to the program specified with --program.') - parser.add_argument('-t', '--txl', - type=str, + help='Arguments to the oracle program.') + parser.add_argument('-e', '--oracle-env', + nargs='*', + help='Override the oracle program\'s environment during' + ' symbolic and concrete execution.') + + # Specification of the concrete test trace + parser.add_argument('-t', '--test-trace', required=True, - help='Path to the translation log (gathered via Arancini)') + help='The concrete test states to test against the' + ' symbolic truth.') + parser.add_argument('--test-trace-type', + default='focaccia', + choices=list(concrete_trace_parsers.keys()), + help='Log file format of the tested program trace.' + ' [Default: focaccia]') + + # Algorithm and output control + parser.add_argument('--match', + choices=list(matching_algorithms.keys()), + default='simple', + help='Select an algorithm to match the test trace to' + ' the truth trace. Only applicable if --symbolic' + ' is enabled.' + ' [Default: simple]') parser.add_argument('--symbolic', action='store_true', default=False, @@ -73,7 +116,6 @@ def parse_arguments(): ' generated errors significantly, but will take' ' more time to complete.') parser.add_argument('--error-level', - type=str, default='warning', choices=list(verbosity.keys()), help='Verbosity of reported errors. \'error\' only' @@ -83,21 +125,19 @@ def parse_arguments(): ' may as well stem from incomplete input data.' ' \'info\' will report absolutely everything.' ' [Default: warning]') - parser.add_argument('-r', '--reproducer', + parser.add_argument('--no-verifier', action='store_true', default=False, - help='Enable reproducer to get assembly code' - ' which should replicate the first error.') - parser.add_argument('--trace-type', - type=str, - default='qemu', - choices=['qemu', 'arancini'], - help='Trace type of the emulator.' - ' Currently only Qemu and Arancini traces are accepted.' - ' Use \'qemu\' for Qemu and \'arancini\' for Arancini.' - ' [Default: qemu]') - args = parser.parse_args() - return args + help='Don\'t print verifier output.') + + # Reproducer + parser.add_argument('--reproducer', + action='store_true', + default=False, + help='Generate repoducer executables for detected' + ' errors.') + + return parser.parse_args() def print_reproducer(result, min_severity: ErrorSeverity, oracle, oracle_args): for res in result: @@ -108,6 +148,30 @@ def print_reproducer(result, min_severity: ErrorSeverity, oracle, oracle_args): print(rep.asm()) return +def get_test_trace(args, arch: Arch) -> Trace[ProgramState]: + path = args.test_trace + parser = concrete_trace_parsers[args.test_trace_type] + with open(path, 'r') as txl_file: + return parser(txl_file, arch) + +def get_truth_env(args) -> TraceEnvironment: + oracle = args.oracle_program + oracle_args = args.oracle_args + if args.oracle_env: + oracle_env = args.oracle_env + else: + oracle_env = get_envp() + return TraceEnvironment(oracle, oracle_args, oracle_env) + +def get_symbolic_trace(args): + if args.oracle_program: + env = get_truth_env(args) + print('Tracing', env) + return collect_symbolic_trace(env) + elif args.oracle_trace: + with open(args.oracle_trace, 'r') as file: + return parser.parse_transformations(file) + raise AssertionError() def main(): args = parse_arguments() @@ -120,36 +184,39 @@ def main(): exit(1) arch = supported_architectures[platform.machine()] - txl_path = args.txl - oracle = args.program - oracle_args = args.program_arg - # Parse reference trace - with open(txl_path, "r") as txl_file: - if args.trace_type == 'qemu': - test_states = parse_snapshots(txl_file) - elif args.trace_type == 'arancini': - test_states = parse_arancini(txl_file, arch) - else: - test_states = parse_snapshots(txl_file) + test_trace = get_test_trace(args, arch) # Compare reference trace to a truth if args.symbolic: - print(f'Tracing {oracle} symbolically with arguments {oracle_args}...') - transforms = collect_symbolic_trace(oracle, oracle_args) - test_states, transforms = match_traces(test_states, transforms) - #fold_traces(test_states, transforms) - result = compare_symbolic(test_states, transforms) + symb_trace = get_symbolic_trace(args) + match = matching_algorithms[args.match] + conc, symb = match(test_trace.states, symb_trace.states) + + result = compare_symbolic(conc, symb) + oracle_env = symb_trace.env else: + if not args.oracle_program: + print('Argument --oracle-program is required for non-symbolic' + ' verification!') + exit(1) + # Record truth states from a concrete execution of the oracle - breakpoints = [state.read_register('PC') for state in test_states] - truth = collect_concrete_trace(oracle, breakpoints) - result = compare_simple(test_states, truth) + breakpoints = [state.read_register('PC') for state in test_trace] + env = get_truth_env(args) + truth_trace = collect_concrete_trace(env, breakpoints) + + result = compare_simple(test_trace.states, truth_trace) + oracle_env = env - print_result(result, verbosity[args.error_level]) + if not args.no_verifier: + print_result(result, verbosity[args.error_level]) if args.reproducer: - print_reproducer(result, verbosity[args.error_level], oracle, oracle_args) + print_reproducer(result, + verbosity[args.error_level], + oracle_env.binary_name, + oracle_env.argv) if __name__ == '__main__': main() diff --git a/focaccia/miasm_util.py b/focaccia/miasm_util.py index 24a0e11..83a8778 100644 --- a/focaccia/miasm_util.py +++ b/focaccia/miasm_util.py @@ -203,9 +203,10 @@ def _eval_exprop(expr, state: MiasmSymbolResolver): # the CPUID instruction. Can't do this in an expression simplifier plugin # because the arguments must be concrete. if expr.op == 'x86_cpuid': - assert(len(args) == 2) - assert(isinstance(args[0], ExprInt) and isinstance(args[1], ExprInt)) - return _eval_cpuid(args[0], args[1]) + if args[0].is_int() and args[1].is_int(): + assert(isinstance(args[0], ExprInt) and isinstance(args[1], ExprInt)) + return _eval_cpuid(args[0], args[1]) + return expr return ExprOp(expr.op, *args) |