diff options
| author | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2024-01-24 12:25:23 +0100 |
|---|---|---|
| committer | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2024-01-24 12:25:23 +0100 |
| commit | a331aa90709fb26b795cdb649b52b067dc8f8af6 (patch) | |
| tree | 316a83cccf5a59c0245009cb21aa87f408053b84 | |
| parent | c8bb7528b8dc053b45d8c652663479f327273ef8 (diff) | |
| download | focaccia-a331aa90709fb26b795cdb649b52b067dc8f8af6.tar.gz focaccia-a331aa90709fb26b795cdb649b52b067dc8f8af6.zip | |
Refactor comparison and user-facing logic
Co-authored-by: Theofilos Augoustis <theofilos.augoustis@gmail.com> Co-authored-by: Nicola Crivellin <nicola.crivellin98@gmail.com>
| -rwxr-xr-x | focaccia.py | 90 | ||||
| -rw-r--r-- | focaccia/compare.py | 26 | ||||
| -rw-r--r-- | focaccia/lldb_target.py | 2 | ||||
| -rw-r--r-- | focaccia/miasm_util.py | 2 | ||||
| -rw-r--r-- | miasm_test.py | 37 |
5 files changed, 55 insertions, 102 deletions
diff --git a/focaccia.py b/focaccia.py index bbd1317..032615b 100755 --- a/focaccia.py +++ b/focaccia.py @@ -1,19 +1,25 @@ -#! /bin/python3 +#! /usr/bin/env python3 import argparse import platform from typing import Iterable -from focaccia.arch import x86 -from focaccia.compare import compare_simple, compare_symbolic, \ - ErrorSeverity, ErrorTypes +from focaccia.arch import supported_architectures +from focaccia.compare import compare_simple, compare_symbolic, ErrorTypes from focaccia.lldb_target import LLDBConcreteTarget from focaccia.parser import parse_arancini from focaccia.snapshot import ProgramState from focaccia.symbolic import SymbolicTransform, collect_symbolic_trace from focaccia.utils import print_result -def run_native_execution(oracle_program: str, breakpoints: Iterable[int]): +verbosity = { + 'info': ErrorTypes.INFO, + 'warning': ErrorTypes.POSSIBLE, + 'error': ErrorTypes.CONFIRMED, +} + +def collect_concrete_trace(oracle_program: str, breakpoints: Iterable[int]) \ + -> list[ProgramState]: """Gather snapshots from a native execution via an external debugger. :param oracle_program: Program to execute. @@ -22,8 +28,6 @@ def run_native_execution(oracle_program: str, breakpoints: Iterable[int]): :return: A list of snapshots gathered from the execution. """ - assert(platform.machine() == "x86_64") - target = LLDBConcreteTarget(oracle_program) # Set breakpoints @@ -75,19 +79,6 @@ def match_traces(test: list[ProgramState], truth: list[SymbolicTransform]): return test, truth -def parse_inputs(txl_path, program): - # Our architecture - arch = x86.ArchX86() - - with open(txl_path, "r") as txl_file: - txl = parse_arancini(txl_file, arch) - - with open(txl_path, "r") as txl_file: - breakpoints = [state.read_register('PC') for state in txl] - ref = run_native_execution(program, breakpoints) - - return txl, ref - def parse_arguments(): parser = argparse.ArgumentParser(description='Comparator for emulator logs to reference') parser.add_argument('-p', '--program', @@ -96,7 +87,6 @@ def parse_arguments(): help='Path to oracle program') parser.add_argument('-a', '--program-arg', type=str, - required=False, default=[], action='append', help='Arguments to the program specified with --program.') @@ -110,42 +100,52 @@ def parse_arguments(): help='Use an advanced algorithm that uses symbolic' ' execution to determine accurate data' ' transformations. This improves the quality of' - ' generated errors significantly, but may take' - ' more time to run.') + ' generated errors significantly, but will take' + ' more time to complete.') parser.add_argument('--error-level', type=str, - default='verbose', - choices=['verbose', 'errors', 'restricted'], - help='Verbosity of reported errors. \'errors\' reports' - ' everything that might be an error in the' - ' translation, while \'verbose\' may report' - ' additional errors from incomplete input' - ' data, etc. [Default: verbose]') + default='warning', + choices=list(verbosity.keys()), + help='Verbosity of reported errors. \'error\' only' + ' reports mismatches that have been detected as' + ' errors in the translation with certainty.' + ' \'warning\' will report possible errors that' + ' may as well stem from incomplete input data.' + ' \'info\' will report absolutely everything.' + ' [Default: warning]') args = parser.parse_args() return args def main(): - verbosity = { - 'verbose': ErrorTypes.INFO, - 'errors': ErrorTypes.POSSIBLE, - 'restricted': ErrorTypes.CONFIRMED, - } args = parse_arguments() + # Determine the current machine's architecture. The log type must match the + # architecture on which focaccia is executed because focaccia wants to + # execute the reference program concretely. + if platform.machine() not in supported_architectures: + print(f'Machine {platform.machine()} is not supported! Exiting.') + exit(1) + arch = supported_architectures[platform.machine()] + txl_path = args.txl - program = args.program - prog_args = args.program_arg - txl, ref = parse_inputs(txl_path, program) + oracle = args.program + oracle_args = args.program_arg - if args.symbolic: - assert(program is not None) + # Parse reference trace + with open(txl_path, "r") as txl_file: + test_states = parse_arancini(txl_file, arch) - print(f'Tracing {program} symbolically with arguments {prog_args}...') - transforms = collect_symbolic_trace(program, prog_args) - txl, transforms = match_traces(txl, transforms) - result = compare_symbolic(txl, transforms) + # Compare reference trace to a truth + if args.symbolic: + print(f'Tracing {oracle} symbolically with arguments {oracle_args}...') + transforms = collect_symbolic_trace(oracle, oracle_args) + test_states, transforms = match_traces(test_states, transforms) + result = compare_symbolic(test_states, transforms) else: - result = compare_simple(txl, ref) + # Record truth states from a concrete execution of the oracle + breakpoints = [state.read_register('PC') for state in test_states] + truth = collect_concrete_trace(oracle, breakpoints) + result = compare_simple(test_states, truth) print_result(result, verbosity[args.error_level]) diff --git a/focaccia/compare.py b/focaccia/compare.py index 36cd54e..d89a41a 100644 --- a/focaccia/compare.py +++ b/focaccia/compare.py @@ -57,9 +57,9 @@ def _calc_transformation(previous: ProgramState, current: ProgramState): transformation = ProgramState(arch) for reg in arch.regnames: try: - prev_val, cur_val = previous.read_register(reg), current.read_register(reg) - if prev_val is not None and cur_val is not None: - transformation.set_register(reg, cur_val - prev_val) + prev_val = previous.read_register(reg) + cur_val = current.read_register(reg) + transformation.set_register(reg, cur_val - prev_val) except RegisterAccessError: # Register is not set in either state pass @@ -138,12 +138,8 @@ def compare_simple(test_states: list[ProgramState], pc_txl = txl.read_register(PC_REGNAME) pc_truth = truth.read_register(PC_REGNAME) - # The program counter should always be set on a snapshot - assert(pc_truth is not None) - assert(pc_txl is not None) - if pc_txl != pc_truth: - print(f'Unmatched program counter {hex(txl.read_register(PC_REGNAME))}' + print(f'Unmatched program counter {hex(pc_txl)}' f' in translated code!') continue @@ -176,7 +172,7 @@ def _find_register_errors(txl_from: ProgramState, except MemoryAccessError as err: s, e = transform_truth.range return [Error( - ErrorTypes.INCOMPLETE, + ErrorTypes.POSSIBLE, f'Register transformations {hex(s)} -> {hex(e)} depend on' f' {err.mem_size} bytes at memory address {hex(err.mem_addr)}' f' that are not entirely present in the tested state' @@ -280,15 +276,9 @@ def _find_errors_symbolic(txl_from: ProgramState, :param transform_truth: The symbolic transformation that maps the source state to the destination state. """ - if (txl_from.read_register('PC') != transform_truth.range[0]) \ - or (txl_to.read_register('PC') != transform_truth.range[1]): - tstart, tend = transform_truth.range - return [Error(ErrorTypes.POSSIBLE, - f'Program counters of the tested transformation' - f' do not match the truth transformation:' - f' {hex(txl_from.read_register("PC"))} -> {hex(txl_to.read_register("PC"))}' - f' (test) vs. {hex(tstart)} -> {hex(tend)} (truth).' - f' Skipping with no errors.')] + from_pc = txl_from.read_register('PC') + to_pc = txl_to.read_register('PC') + assert((from_pc, to_pc) == transform_truth.range) errors = [] errors.extend(_find_register_errors(txl_from, txl_to, transform_truth)) diff --git a/focaccia/lldb_target.py b/focaccia/lldb_target.py index 05ab66d..903e73d 100644 --- a/focaccia/lldb_target.py +++ b/focaccia/lldb_target.py @@ -178,7 +178,7 @@ class LLDBConcreteTarget: if not err.success: raise ConcreteMemoryError(f'Error when reading {size} bytes at' f' address {hex(addr)}: {err}') - return content + return bytes(reversed(content)) # Convert to big endian def write_memory(self, addr, value: bytes): """Write bytes to memory. diff --git a/focaccia/miasm_util.py b/focaccia/miasm_util.py index 514390d..f43c151 100644 --- a/focaccia/miasm_util.py +++ b/focaccia/miasm_util.py @@ -131,7 +131,7 @@ def _eval_exprmem(expr: ExprMem, state: MiasmConcreteState): return expr assert(len(mem) * 8 == expr.size) - return ExprInt(int.from_bytes(mem, byteorder='little'), expr.size) + return ExprInt(int.from_bytes(mem), expr.size) def _eval_exprcond(expr, state: MiasmConcreteState): """Evaluate an ExprCond using the current state""" diff --git a/miasm_test.py b/miasm_test.py deleted file mode 100644 index 8d5bd9a..0000000 --- a/miasm_test.py +++ /dev/null @@ -1,37 +0,0 @@ -import argparse - -from focaccia.symbolic import collect_symbolic_trace - -def main(): - program = argparse.ArgumentParser() - program.add_argument('binary') - program.add_argument('argv', action='store', nargs=argparse.REMAINDER) - program.add_argument('--start-addr', - help='Instruction at which to start') - args = program.parse_args() - - binary = args.binary - argv = args.argv - - pc = None - if args.start_addr: - try: - pc = int(args.start_addr, 16) - except ValueError: - print(f'Start address must be a hexadecimal number. Exiting.') - exit(1) - - strace = collect_symbolic_trace(binary, argv, pc) - - print(f'--- {len(strace)} instructions traced.') - print(f'--- No new PC found. Exiting.') - -if __name__ == "__main__": - main() - -# TODO: To implement support for unimplemented instructions, add their -# ASM->IR implementations to the `mnemo_func` array in -# `miasm/arch/x86/sem.py:5142`. -# -# For XGETBV, I might have to add the extended control register XCR0 first. -# This might be a nontrivial patch to Miasm. |