diff options
Diffstat (limited to 'parser.py')
| -rw-r--r-- | parser.py | 81 |
1 files changed, 56 insertions, 25 deletions
diff --git a/parser.py b/parser.py index 19fb7c2..391d58a 100644 --- a/parser.py +++ b/parser.py @@ -59,43 +59,59 @@ def serialize_snapshots(snapshots: list[ProgramState], out_stream: TextIO): json.dump(res, out_stream) def parse_qemu(stream: TextIO, arch: Arch) -> list[ProgramState]: + """Parse a QEMU log from a stream. + + :return: A list of parsed program states, in order of occurrence in the + log. + """ states = [] for line in stream: if line.startswith('Trace'): states.append(ProgramState(arch)) continue + if states: + _parse_qemu_line(line, states[-1]) + + return states - line = line.strip() +def _parse_qemu_line(line: str, cur_state: ProgramState): + """Try to parse a single register-assignment line from a QEMU log. - # Remove padding spaces around equality signs - line = re.sub(' =', '=', line) - line = re.sub('= +', '=', line) + Set all registers for which the line specified values in a `ProgramState` + object. - # Standardize register names - line = re.sub('YMM0([0-9])', lambda m: f'YMM{m.group(1)}', line) - line = re.sub('FPR([0-9])', lambda m: f'ST{m.group(1)}', line) + :param line: The log line to parse. + :param cur_state: The state on which to set parsed register values. + """ + line = line.strip() - # Bring each register assignment into a new line - line = re.sub(' ([A-Z0-9]+)=', lambda m: f'\n{m.group(1)}=', line) + # Remove padding spaces around equality signs + line = re.sub(' =', '=', line) + line = re.sub('= +', '=', line) - # Remove all trailing information from register assignments - line = re.sub('^([A-Z0-9]+)=([0-9a-f ]+).*$', - lambda m: f'{m.group(1)}={m.group(2)}', - line, - 0, re.MULTILINE) + # Standardize register names + line = re.sub('YMM0([0-9])', lambda m: f'YMM{m.group(1)}', line) + line = re.sub('FPR([0-9])', lambda m: f'ST{m.group(1)}', line) - # Now parse registers and their values from the resulting lines - lines = line.split('\n') - for line in lines: - split = line.split('=') - if len(split) == 2: - regname, value = split - value = value.replace(' ', '') - regname = arch.to_regname(regname) - if regname is not None: - states[-1].set(regname, int(value, 16)) + # Bring each register assignment into a new line + line = re.sub(' ([A-Z0-9]+)=', lambda m: f'\n{m.group(1)}=', line) - return states + # Remove all trailing information from register assignments + line = re.sub('^([A-Z0-9]+)=([0-9a-f ]+).*$', + lambda m: f'{m.group(1)}={m.group(2)}', + line, + 0, re.MULTILINE) + + # Now parse registers and their values from the resulting lines + lines = line.split('\n') + for line in lines: + split = line.split('=') + if len(split) == 2: + regname, value = split + value = value.replace(' ', '') + regname = cur_state.arch.to_regname(regname) + if regname is not None: + cur_state.set(regname, int(value, 16)) def parse_arancini(stream: TextIO, arch: Arch) -> list[ProgramState]: aliases = { @@ -123,3 +139,18 @@ def parse_arancini(stream: TextIO, arch: Arch) -> list[ProgramState]: states[-1].set(regname, int(value, 16)) return states + +if __name__ == "__main__": + from arch import x86 + with open('qemu.log', 'r') as file: + states = parse_qemu(file, x86.ArchX86()) + print(f'Parsed {len(states)} states from QEMU log.') + with open('dump.qemu', 'w') as file: + serialize_snapshots(states, file) + + with open('emulator-log.txt', 'r') as file: + states = parse_arancini(file, x86.ArchX86()) + print(f'Parsed {len(states)} states from Arancini log.') + with open('dump.arancini', 'w') as file: + serialize_snapshots(states, file) + exit(0) |