about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--parser.py81
1 files changed, 56 insertions, 25 deletions
diff --git a/parser.py b/parser.py
index 19fb7c2..391d58a 100644
--- a/parser.py
+++ b/parser.py
@@ -59,43 +59,59 @@ def serialize_snapshots(snapshots: list[ProgramState], out_stream: TextIO):
     json.dump(res, out_stream)
 
 def parse_qemu(stream: TextIO, arch: Arch) -> list[ProgramState]:
+    """Parse a QEMU log from a stream.
+
+    :return: A list of parsed program states, in order of occurrence in the
+             log.
+    """
     states = []
     for line in stream:
         if line.startswith('Trace'):
             states.append(ProgramState(arch))
             continue
+        if states:
+            _parse_qemu_line(line, states[-1])
+
+    return states
 
-        line = line.strip()
+def _parse_qemu_line(line: str, cur_state: ProgramState):
+    """Try to parse a single register-assignment line from a QEMU log.
 
-        # Remove padding spaces around equality signs
-        line = re.sub(' =', '=', line)
-        line = re.sub('= +', '=', line)
+    Set all registers for which the line specified values in a `ProgramState`
+    object.
 
-        # Standardize register names
-        line = re.sub('YMM0([0-9])',   lambda m: f'YMM{m.group(1)}', line)
-        line = re.sub('FPR([0-9])',    lambda m: f'ST{m.group(1)}', line)
+    :param line:      The log line to parse.
+    :param cur_state: The state on which to set parsed register values.
+    """
+    line = line.strip()
 
-        # Bring each register assignment into a new line
-        line = re.sub(' ([A-Z0-9]+)=', lambda m: f'\n{m.group(1)}=', line)
+    # Remove padding spaces around equality signs
+    line = re.sub(' =', '=', line)
+    line = re.sub('= +', '=', line)
 
-        # Remove all trailing information from register assignments
-        line = re.sub('^([A-Z0-9]+)=([0-9a-f ]+).*$',
-                      lambda m: f'{m.group(1)}={m.group(2)}',
-                      line,
-                      0, re.MULTILINE)
+    # Standardize register names
+    line = re.sub('YMM0([0-9])',   lambda m: f'YMM{m.group(1)}', line)
+    line = re.sub('FPR([0-9])',    lambda m: f'ST{m.group(1)}', line)
 
-        # Now parse registers and their values from the resulting lines
-        lines = line.split('\n')
-        for line in lines:
-            split = line.split('=')
-            if len(split) == 2:
-                regname, value = split
-                value = value.replace(' ', '')
-                regname = arch.to_regname(regname)
-                if regname is not None:
-                    states[-1].set(regname, int(value, 16))
+    # Bring each register assignment into a new line
+    line = re.sub(' ([A-Z0-9]+)=', lambda m: f'\n{m.group(1)}=', line)
 
-    return states
+    # Remove all trailing information from register assignments
+    line = re.sub('^([A-Z0-9]+)=([0-9a-f ]+).*$',
+                  lambda m: f'{m.group(1)}={m.group(2)}',
+                  line,
+                  0, re.MULTILINE)
+
+    # Now parse registers and their values from the resulting lines
+    lines = line.split('\n')
+    for line in lines:
+        split = line.split('=')
+        if len(split) == 2:
+            regname, value = split
+            value = value.replace(' ', '')
+            regname = cur_state.arch.to_regname(regname)
+            if regname is not None:
+                cur_state.set(regname, int(value, 16))
 
 def parse_arancini(stream: TextIO, arch: Arch) -> list[ProgramState]:
     aliases = {
@@ -123,3 +139,18 @@ def parse_arancini(stream: TextIO, arch: Arch) -> list[ProgramState]:
                 states[-1].set(regname, int(value, 16))
 
     return states
+
+if __name__ == "__main__":
+    from arch import x86
+    with open('qemu.log', 'r') as file:
+        states = parse_qemu(file, x86.ArchX86())
+        print(f'Parsed {len(states)} states from QEMU log.')
+    with open('dump.qemu', 'w') as file:
+        serialize_snapshots(states, file)
+
+    with open('emulator-log.txt', 'r') as file:
+        states = parse_arancini(file, x86.ArchX86())
+        print(f'Parsed {len(states)} states from Arancini log.')
+    with open('dump.arancini', 'w') as file:
+        serialize_snapshots(states, file)
+    exit(0)