about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorTheofilos Augoustis <theofilos.augoustis@gmail.com>2025-11-25 15:16:38 +0000
committerTheofilos Augoustis <theofilos.augoustis@gmail.com>2025-11-25 15:19:13 +0000
commit56eda951c6518800a18b1beec5e14eac8705cdf9 (patch)
treed08f3a94e945796516bb96a5f90a4b8657d741fb
parent293cabc19447c968b2d48752dcc2cdf3a8771b0c (diff)
downloadfocaccia-56eda951c6518800a18b1beec5e14eac8705cdf9.tar.gz
focaccia-56eda951c6518800a18b1beec5e14eac8705cdf9.zip
Offer option for streaming symbolic transforms
-rw-r--r--pyproject.toml1
-rw-r--r--src/focaccia/parser.py53
-rw-r--r--src/focaccia/qemu/_qemu_tool.py8
-rwxr-xr-xsrc/focaccia/tools/capture_transforms.py7
-rwxr-xr-xsrc/focaccia/tools/validate_qemu.py4
-rw-r--r--uv.lock14
6 files changed, 74 insertions, 13 deletions
diff --git a/pyproject.toml b/pyproject.toml
index dd56c1d..eaa8997 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,7 @@ dependencies = [
 	"miasm",
 	"orjson",
 	"brotli",
+	'msgpack',
 	"pycapnp",
 	"setuptools",
 	"python-ptrace",
diff --git a/src/focaccia/parser.py b/src/focaccia/parser.py
index 4df14ec..9c7b283 100644
--- a/src/focaccia/parser.py
+++ b/src/focaccia/parser.py
@@ -2,8 +2,9 @@
 
 import re
 import base64
+import msgpack
 import orjson as json
-from typing import TextIO
+from typing import TextIO, Literal
 
 from .arch import supported_architectures, Arch
 from .snapshot import ProgramState
@@ -30,15 +31,49 @@ def parse_transformations(json_stream: TextIO) -> TraceContainer[SymbolicTransfo
 
     return TraceContainer(strace, env)
 
-def serialize_transformations(transforms: Trace[SymbolicTransform],
-                              out_stream: TextIO):
+def stream_transformation(stream) -> Trace[SymbolicTransform]:
+    unpacker = msgpack.Unpacker(stream, raw=False)
+
+    # First object always contains env
+    header = next(unpacker)
+    env = TraceEnvironment.from_json(header['env'])
+    addresses = header.get('addresses')
+
+    def state_iter():
+        for obj in unpacker:
+            t = SymbolicTransform.from_json(obj['state'])
+            yield t
+
+    return Trace(state_iter(), addresses, env)
+
+def serialize_transformations(trace: Trace[SymbolicTransform],
+                              out_file: str,
+                              out_type: Literal['msgpack', 'json'] = 'json'):
     """Serialize symbolic transformations to a text stream."""
-    data = json.dumps({
-        'env': transforms.env.to_json(),
-        'addrs': transforms.addresses,
-        'states': [t.to_json() for t in transforms],
-    }, option=json.OPT_INDENT_2).decode()
-    out_stream.write(data)
+    if out_type == 'json':
+        with open(out_file, 'w') as out_stream:
+            data = json.dumps({
+                'env': trace.env.to_json(),
+                'addrs': trace.addresses,
+                'states': [t.to_json() for t in trace],
+            }, option=json.OPT_INDENT_2).decode()
+            out_stream.write(data)
+    elif out_type == 'msgpack':
+        with open(out_file, 'wb') as out_stream:
+            pack = msgpack.Packer()
+
+            # Header: env + addresses (list[int])
+            header = {
+                "env": trace.env.to_json(),
+                "addresses": getattr(trace, "addresses", None),
+            }
+            out_stream.write(pack.pack(header))
+
+            # States streamed one by one
+            for state in trace:
+                out_stream.write(pack.pack({"state": state.to_json()}))
+    else:
+        raise NotImplementedError(f'Unable to write transformations to type {out_type}')
 
 def parse_snapshots(json_stream: TextIO) -> TraceContainer[ProgramState]:
     """Parse snapshots from our JSON format."""
diff --git a/src/focaccia/qemu/_qemu_tool.py b/src/focaccia/qemu/_qemu_tool.py
index 42f1628..7f1d108 100644
--- a/src/focaccia/qemu/_qemu_tool.py
+++ b/src/focaccia/qemu/_qemu_tool.py
@@ -261,8 +261,12 @@ def main():
 
     # Read pre-computed symbolic trace
     try:
-        with open(args.symb_trace, 'r') as strace:
-            symb_transforms = parser.parse_transformations(strace)
+        if args.trace_type == 'json':
+            file = open(args.symb_trace, 'r')
+            symb_transforms = parser.parse_transformations(file)
+        else:
+            file = open(args.symb_trace, 'rb')
+            symb_transforms = parser.stream_transformation(file)
     except Exception as e:
         raise Exception(f'Failed to parse state transformations from native trace: {e}')
 
diff --git a/src/focaccia/tools/capture_transforms.py b/src/focaccia/tools/capture_transforms.py
index 268af36..d69c786 100755
--- a/src/focaccia/tools/capture_transforms.py
+++ b/src/focaccia/tools/capture_transforms.py
@@ -51,6 +51,10 @@ def main():
                       type=utils.to_num,
                       help='Set a time limit for executing an instruction symbolically, skip'
                            'instruction when limit is exceeded')
+    prog.add_argument('--out-type',
+                      default='json',
+                      choices=['json', 'msgpack'],
+                      help='Symbolic trace output format')
     args = prog.parse_args()
 
     if args.debug:
@@ -77,6 +81,5 @@ def main():
 
     trace = tracer.trace(time_limit=args.insn_time_limit)
 
-    with open(args.output, 'w') as file:
-        parser.serialize_transformations(trace, file)
+    parser.serialize_transformations(trace, args.output, args.out_type)
 
diff --git a/src/focaccia/tools/validate_qemu.py b/src/focaccia/tools/validate_qemu.py
index 4b5160f..1d713bd 100755
--- a/src/focaccia/tools/validate_qemu.py
+++ b/src/focaccia/tools/validate_qemu.py
@@ -86,6 +86,10 @@ memory, and stepping forward by single instructions.
                       help='GDB binary to invoke.')
     prog.add_argument('--deterministic-log', default=None,
                       help='The directory containing rr traces')
+    prog.add_argument('--trace-type',
+                      default='json',
+                      choices=['msgpack', 'json'],
+                      help='The format of the input symbolic trace')
     return prog
 
 def quoted(s: str) -> str:
diff --git a/uv.lock b/uv.lock
index 8bb04ed..96f0947 100644
--- a/uv.lock
+++ b/uv.lock
@@ -88,6 +88,7 @@ dependencies = [
     { name = "cffi", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "cpuid", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "miasm", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "msgpack", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "orjson", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "pycapnp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "python-ptrace", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -109,6 +110,7 @@ requires-dist = [
     { name = "cffi" },
     { name = "cpuid", git = "https://github.com/taugoust/cpuid.py.git?rev=master" },
     { name = "miasm", directory = "miasm" },
+    { name = "msgpack" },
     { name = "orjson" },
     { name = "pycapnp" },
     { name = "pyright", marker = "extra == 'dev'" },
@@ -158,6 +160,18 @@ requires-dist = [
 provides-extras = ["dev"]
 
 [[package]]
+name = "msgpack"
+version = "1.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4d/f2/bfb55a6236ed8725a96b0aa3acbd0ec17588e6a2c3b62a93eb513ed8783f/msgpack-1.1.2.tar.gz", hash = "sha256:3b60763c1373dd60f398488069bcdc703cd08a711477b5d480eecc9f9626f47e", size = 173581, upload-time = "2025-10-08T09:15:56.596Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f2/60/a064b0345fc36c4c3d2c743c82d9100c40388d77f0b48b2f04d6041dbec1/msgpack-1.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c63eea553c69ab05b6747901b97d620bb2a690633c77f23feb0c6a947a8a7b8f", size = 417131, upload-time = "2025-10-08T09:15:05.136Z" },
+    { url = "https://files.pythonhosted.org/packages/65/92/a5100f7185a800a5d29f8d14041f61475b9de465ffcc0f3b9fba606e4505/msgpack-1.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:372839311ccf6bdaf39b00b61288e0557916c3729529b301c52c2d88842add42", size = 427556, upload-time = "2025-10-08T09:15:06.837Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/87/ffe21d1bf7d9991354ad93949286f643b2bb6ddbeab66373922b44c3b8cc/msgpack-1.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2929af52106ca73fcb28576218476ffbb531a036c2adbcf54a3664de124303e9", size = 404920, upload-time = "2025-10-08T09:15:08.179Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/41/8543ed2b8604f7c0d89ce066f42007faac1eaa7d79a81555f206a5cdb889/msgpack-1.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:be52a8fc79e45b0364210eef5234a7cf8d330836d0a64dfbb878efa903d84620", size = 415013, upload-time = "2025-10-08T09:15:09.83Z" },
+]
+
+[[package]]
 name = "mypy-extensions"
 version = "1.1.0"
 source = { registry = "https://pypi.org/simple" }