diff options
Diffstat (limited to 'compare.py')
| -rwxr-xr-x | compare.py | 234 |
1 files changed, 234 insertions, 0 deletions
diff --git a/compare.py b/compare.py new file mode 100755 index 0000000..36cc9c7 --- /dev/null +++ b/compare.py @@ -0,0 +1,234 @@ +#! /bin/python3 +import re +import sys +import shutil +import argparse +from functools import partial as bind + +from utils import check_version +from utils import print_separator + +from run import Runner + +class ContextBlock: + regnames = ['PC', + 'RAX', + 'RBX', + 'RCX', + 'RDX', + 'RSI', + 'RDI', + 'RBP', + 'RSP', + 'R8', + 'R9', + 'R10', + 'R11', + 'R12', + 'R13', + 'R14', + 'R15', + 'flag ZF', + 'flag CF', + 'flag OF', + 'flag SF', + 'flag PF', + 'flag DF'] + + def __init__(self): + self.regs = {reg: None for reg in ContextBlock.regnames} + + def set(self, idx: int, value: int): + self.regs[list(self.regs.keys())[idx]] = value + + def __repr__(self): + return self.regs.__repr__() + +class Constructor: + def __init__(self, structure: dict): + self.cblocks = [] + self.structure = structure + self.patterns = list(self.structure.keys()) + + def match(self, line: str): + # find patterns that match it + regex = re.compile("|".join(self.patterns)) + match = regex.match(line) + + idx = self.patterns.index(match.group(0)) if match else 0 + + pattern = self.patterns[idx] + register = ContextBlock.regnames[idx] + + return register, self.structure[pattern](line) + + def add(self, key: str, value: int): + if key == 'PC': + self.cblocks.append(ContextBlock()) + + if self.cblocks[-1].regs[key] != None: + raise RuntimeError("Reassigning register") + + self.cblocks[-1].regs[key] = value + +def parse(lines: list, labels: list): + ctor = Constructor(labels) + + regex = re.compile("|".join(ctor.patterns)) + lines = [l for l in lines if regex.match(l) is not None] + + for line in lines: + key, value = ctor.match(line) + ctor.add(key, value) + + return ctor.cblocks + +def get_labels(): + split_value = lambda x,i: int(x.split()[i], 16) + + split_first = bind(split_value, i=1) + split_second = bind(split_value, i=2) + + split_equal = lambda x,i: int(x.split('=')[i], 16) + labels = {'INVOKE': bind(split_equal, i=1), + 'RAX': split_first, + 'RBX': split_first, + 'RCX': split_first, + 'RDX': split_first, + 'RSI': split_first, + 'RDI': split_first, + 'RBP': split_first, + 'RSP': split_first, + 'R8': split_first, + 'R9': split_first, + 'R10': split_first, + 'R11': split_first, + 'R12': split_first, + 'R13': split_first, + 'R14': split_first, + 'R15': split_first, + 'flag ZF': split_second, + 'flag CF': split_second, + 'flag OF': split_second, + 'flag SF': split_second, + 'flag PF': split_second, + 'flag DF': split_second} + return labels + +def equivalent(val1, val2): + return val1 == val2 + +def verify(translation: ContextBlock, reference: ContextBlock): + if translation.regs["PC"] != reference.regs["PC"]: + return 1 + + print_separator() + print(f'For PC={hex(translation.regs["PC"])}') + print_separator() + for el1 in translation.regs.keys(): + for el2 in reference.regs.keys(): + if el1 != el2: + continue + + if translation.regs[el1] is None: + print(f'Element not available in translation: {el1}') + continue + + if reference.regs[el2] is None: + print(f'Element not available in reference: {el2}') + continue + + if not equivalent(translation.regs[el1], reference.regs[el2]): + print(f'Difference for {el1}: {hex(translation.regs[el1])} != {hex(reference.regs[el2])}') + return 0 + +def compare(txl: list, native: list, stats: bool = False): + txl = parse(txl, get_labels()) + native = parse(native, get_labels()) + + if len(txl) != len(native): + print(f'Different number of blocks discovered translation: {len(txl)} vs. ' + f'reference: {len(native)}', file=sys.stderr) + + unmatched_pcs = {} + for translation, reference in zip(txl, native): + if verify(translation, reference) == 1: + # TODO: add verbose output + print_separator(stream=sys.stderr) + print(f'No match for PC {hex(translation.regs["PC"])}', file=sys.stderr) + if translation.regs['PC'] not in unmatched_pcs: + unmatched_pcs[translation.regs['PC']] = 0 + unmatched_pcs[translation.regs['PC']] += 1 + + if stats: + print_separator() + print('Statistics:') + print_separator() + + for pc in unmatched_pcs: + print(f'PC {hex(pc)} unmatched {unmatched_pcs[pc]} times') + return 0 + +def read_logs(txl_path, native_path, program): + txl = [] + with open(txl_path, "r") as txl_file: + txl = txl_file.readlines() + + native = [] + if program is not None: + runner = Runner(txl, program) + native = runner.run() + else: + with open(native_path, "r") as native_file: + native = native_file.readlines() + + return txl, native + +def parse_arguments(): + parser = argparse.ArgumentParser(description='Comparator for emulator logs to reference') + parser.add_argument('-p', '--program', + type=str, + help='Path to oracle program') + parser.add_argument('-r', '--ref', + type=str, + required=True, + help='Path to the reference log (gathered with run.sh)') + parser.add_argument('-t', '--txl', + type=str, + required=True, + help='Path to the translation log (gathered via Arancini)') + parser.add_argument('-s', '--stats', + action='store_true', + default='store_false', + help='Run statistics on comparisons') + parser.add_argument('-v', '--verbose', + action='store_true', + default='store_true', + help='Path to oracle program') + args = parser.parse_args() + return args + +if __name__ == "__main__": + check_version('3.7') + + args = parse_arguments() + + txl_path = args.txl + native_path = args.ref + program = args.program + + stats = args.stats + verbose = args.verbose + + if program is None and native_path is None: + raise ValueError('Either program or path to native file must be' + 'provided') + + txl, native = read_logs(txl_path, native_path, program) + + if program != None and native_path != None: + with open(native_path, 'w') as w: + w.write(''.join(native)) + + compare(txl, native, stats) + |