about summary refs log tree commit diff stats
path: root/src/miasm/core/sembuilder.py
diff options
context:
space:
mode:
authorTheofilos Augoustis <theofilos.augoustis@gmail.com>2025-10-14 09:09:29 +0000
committerTheofilos Augoustis <theofilos.augoustis@gmail.com>2025-10-14 09:09:29 +0000
commit579cf1d03fb932083e6317967d1613d5c2587fb6 (patch)
tree629f039935382a2a7391bce9253f6c9968159049 /src/miasm/core/sembuilder.py
parent51c15d3ea2e16d4fc5f0f01a3b9befc66b1f982e (diff)
downloadfocaccia-miasm-ta/nix.tar.gz
focaccia-miasm-ta/nix.zip
Convert to src-layout ta/nix
Diffstat (limited to 'src/miasm/core/sembuilder.py')
-rw-r--r--src/miasm/core/sembuilder.py341
1 files changed, 341 insertions, 0 deletions
diff --git a/src/miasm/core/sembuilder.py b/src/miasm/core/sembuilder.py
new file mode 100644
index 00000000..9843ee6a
--- /dev/null
+++ b/src/miasm/core/sembuilder.py
@@ -0,0 +1,341 @@
+"Helper to quickly build instruction's semantic side effects"
+
+import inspect
+import ast
+import re
+
+from future.utils import PY3
+
+import miasm.expression.expression as m2_expr
+from miasm.ir.ir import IRBlock, AssignBlock
+
+
+class MiasmTransformer(ast.NodeTransformer):
+    """AST visitor translating DSL to Miasm expression
+
+    memX[Y]       -> ExprMem(Y, X)
+    iX(Y)         -> ExprIntX(Y)
+    X if Y else Z -> ExprCond(Y, X, Z)
+    'X'(Y)        -> ExprOp('X', Y)
+    ('X' % Y)(Z)  -> ExprOp('X' % Y, Z)
+    {a, b}        -> ExprCompose(((a, 0, a.size), (b, a.size, a.size + b.size)))
+    """
+
+    # Parsers
+    parse_integer = re.compile(r"^i([0-9]+)$")
+    parse_mem = re.compile(r"^mem([0-9]+)$")
+
+    # Visitors
+    def visit_Call(self, node):
+        """iX(Y) -> ExprIntX(Y),
+        'X'(Y) -> ExprOp('X', Y), ('X' % Y)(Z) -> ExprOp('X' % Y, Z)"""
+
+        # Recursive visit
+        node = self.generic_visit(node)
+        if isinstance(node.func, ast.Name):
+            # iX(Y) -> ExprInt(Y, X)
+            fc_name = node.func.id
+
+            # Match the function name
+            new_name = fc_name
+            integer = self.parse_integer.search(fc_name)
+
+            # Do replacement
+            if integer is not None:
+                size = int(integer.groups()[0])
+                new_name = "ExprInt"
+                # Replace in the node
+                node.func.id = new_name
+                node.args.append(ast.Num(n=size))
+
+        elif (isinstance(node.func, ast.Str) or
+              (isinstance(node.func, ast.BinOp) and
+               isinstance(node.func.op, ast.Mod) and
+               isinstance(node.func.left, ast.Str))):
+            # 'op'(args...) -> ExprOp('op', args...)
+            # ('op' % (fmt))(args...) -> ExprOp('op' % (fmt), args...)
+            op_name = node.func
+
+            # Do replacement
+            node.func = ast.Name(id="ExprOp", ctx=ast.Load())
+            node.args[0:0] = [op_name]
+
+        return node
+
+    def visit_IfExp(self, node):
+        """X if Y else Z -> ExprCond(Y, X, Z)"""
+        # Recursive visit
+        node = self.generic_visit(node)
+
+        # Build the new ExprCond
+        call = ast.Call(func=ast.Name(id='ExprCond', ctx=ast.Load()),
+                        args=[self.visit(node.test),
+                              self.visit(node.body),
+                              self.visit(node.orelse)],
+                        keywords=[], starargs=None, kwargs=None)
+        return call
+
+    def visit_Set(self, node):
+        "{a, b} -> ExprCompose(a, b)"
+        if len(node.elts) == 0:
+            return node
+
+        # Recursive visit
+        node = self.generic_visit(node)
+
+        return ast.Call(func=ast.Name(id='ExprCompose',
+                                      ctx=ast.Load()),
+                               args=node.elts,
+                               keywords=[],
+                               starargs=None,
+                               kwargs=None)
+
+if PY3:
+    def get_arg_name(name):
+        return name.arg
+    def gen_arg(name, ctx):
+        return ast.arg(arg=name, ctx=ctx)
+else:
+    def get_arg_name(name):
+        return name.id
+    def gen_arg(name, ctx):
+        return ast.Name(id=name, ctx=ctx)
+
+
+class SemBuilder(object):
+    """Helper for building instruction's semantic side effects method
+
+    This class provides a decorator @parse to use on them.
+    The context in which the function will be parsed must be supplied on
+    instantiation
+    """
+
+    def __init__(self, ctx):
+        """Create a SemBuilder
+        @ctx: context dictionary used during parsing
+        """
+        # Init
+        self.transformer = MiasmTransformer()
+        self._ctx = dict(m2_expr.__dict__)
+        self._ctx["IRBlock"] = IRBlock
+        self._ctx["AssignBlock"] = AssignBlock
+        self._functions = {}
+
+        # Update context
+        self._ctx.update(ctx)
+
+    @property
+    def functions(self):
+        """Return a dictionary name -> func of parsed functions"""
+        return self._functions.copy()
+
+    @staticmethod
+    def _create_labels(loc_else=False):
+        """Return the AST standing for label creations
+        @loc_else (optional): if set, create a label 'loc_else'"""
+        loc_end = "loc_end = ir.get_next_loc_key(instr)"
+        loc_end_expr = "loc_end_expr = ExprLoc(loc_end, ir.IRDst.size)"
+        out = ast.parse(loc_end).body
+        out += ast.parse(loc_end_expr).body
+        loc_if = "loc_if = ir.loc_db.add_location()"
+        loc_if_expr = "loc_if_expr = ExprLoc(loc_if, ir.IRDst.size)"
+        out += ast.parse(loc_if).body
+        out += ast.parse(loc_if_expr).body
+        if loc_else:
+            loc_else = "loc_else = ir.loc_db.add_location()"
+            loc_else_expr = "loc_else_expr = ExprLoc(loc_else, ir.IRDst.size)"
+            out += ast.parse(loc_else).body
+            out += ast.parse(loc_else_expr).body
+        return out
+
+    def _parse_body(self, body, argument_names):
+        """Recursive function transforming a @body to a block expression
+        Return:
+         - AST to append to body (real python statements)
+         - a list of blocks, ie list of affblock, ie list of ExprAssign (AST)"""
+
+        # Init
+        ## Real instructions
+        real_body = []
+        ## Final blocks
+        blocks = [[[]]]
+
+        for statement in body:
+
+            if isinstance(statement, ast.Assign):
+                src = self.transformer.visit(statement.value)
+                dst = self.transformer.visit(statement.targets[0])
+
+                if (isinstance(dst, ast.Name) and
+                    dst.id not in argument_names and
+                    dst.id not in self._ctx and
+                    dst.id not in self._local_ctx):
+
+                    # Real variable declaration
+                    statement.value = src
+                    real_body.append(statement)
+                    self._local_ctx[dst.id] = src
+                    continue
+
+                dst.ctx = ast.Load()
+
+                res = ast.Call(func=ast.Name(id='ExprAssign',
+                                             ctx=ast.Load()),
+                               args=[dst, src],
+                               keywords=[],
+                               starargs=None,
+                               kwargs=None)
+
+                blocks[-1][-1].append(res)
+
+            elif (isinstance(statement, ast.Expr) and
+                  isinstance(statement.value, ast.Str)):
+                # String (docstring, comment, ...) -> keep it
+                real_body.append(statement)
+
+            elif isinstance(statement, ast.If):
+                # Create jumps : ir.IRDst = loc_if if cond else loc_end
+                # if .. else .. are also handled
+                cond = statement.test
+                real_body += self._create_labels(loc_else=True)
+
+                loc_end = ast.Name(id='loc_end_expr', ctx=ast.Load())
+                loc_if = ast.Name(id='loc_if_expr', ctx=ast.Load())
+                loc_else = ast.Name(id='loc_else_expr', ctx=ast.Load()) \
+                           if statement.orelse else loc_end
+                dst = ast.Call(func=ast.Name(id='ExprCond',
+                                             ctx=ast.Load()),
+                               args=[cond,
+                                     loc_if,
+                                     loc_else],
+                               keywords=[],
+                               starargs=None,
+                               kwargs=None)
+
+                if (isinstance(cond, ast.UnaryOp) and
+                    isinstance(cond.op, ast.Not)):
+                    ## if not cond -> switch exprCond
+                    dst.args[1:] = dst.args[1:][::-1]
+                    dst.args[0] = cond.operand
+
+                IRDst = ast.Attribute(value=ast.Name(id='ir',
+                                                     ctx=ast.Load()),
+                                      attr='IRDst', ctx=ast.Load())
+                loc_db = ast.Attribute(value=ast.Name(id='ir',
+                                                     ctx=ast.Load()),
+                                      attr='loc_db', ctx=ast.Load())
+                blocks[-1][-1].append(ast.Call(func=ast.Name(id='ExprAssign',
+                                                             ctx=ast.Load()),
+                                               args=[IRDst, dst],
+                                               keywords=[],
+                                               starargs=None,
+                                               kwargs=None))
+
+                # Create the new blocks
+                elements = [(statement.body, 'loc_if')]
+                if statement.orelse:
+                    elements.append((statement.orelse, 'loc_else'))
+                for content, loc_name in elements:
+                    sub_blocks, sub_body = self._parse_body(content,
+                                                            argument_names)
+                    if len(sub_blocks) > 1:
+                        raise RuntimeError("Imbricated if unimplemented")
+
+                    ## Close the last block
+                    jmp_end = ast.Call(func=ast.Name(id='ExprAssign',
+                                                     ctx=ast.Load()),
+                                       args=[IRDst, loc_end],
+                                       keywords=[],
+                                       starargs=None,
+                                       kwargs=None)
+                    sub_blocks[-1][-1].append(jmp_end)
+
+
+                    instr = ast.Name(id='instr', ctx=ast.Load())
+                    effects = ast.List(elts=sub_blocks[-1][-1],
+                                       ctx=ast.Load())
+                    assignblk = ast.Call(func=ast.Name(id='AssignBlock',
+                                                       ctx=ast.Load()),
+                                         args=[effects, instr],
+                                         keywords=[],
+                                         starargs=None,
+                                         kwargs=None)
+
+
+                    ## Replace the block with a call to 'IRBlock'
+                    loc_if_name = ast.Name(id=loc_name, ctx=ast.Load())
+
+                    assignblks = ast.List(elts=[assignblk],
+                                          ctx=ast.Load())
+
+                    sub_blocks[-1] = ast.Call(func=ast.Name(id='IRBlock',
+                                                            ctx=ast.Load()),
+                                              args=[
+                                                  loc_db,
+                                                  loc_if_name,
+                                                  assignblks
+                                              ],
+                                              keywords=[],
+                                              starargs=None,
+                                              kwargs=None)
+                    blocks += sub_blocks
+                    real_body += sub_body
+
+                # Prepare a new block for following statement
+                blocks.append([[]])
+
+            else:
+                # TODO: real var, +=, /=, -=, <<=, >>=, if/else, ...
+                raise RuntimeError("Unimplemented %s" % statement)
+
+        return blocks, real_body
+
+    def parse(self, func):
+        """Function decorator, returning a correct method from a pseudo-Python
+        one"""
+
+        # Get the function AST
+        parsed = ast.parse(inspect.getsource(func))
+        fc_ast = parsed.body[0]
+        argument_names = [get_arg_name(name) for name in fc_ast.args.args]
+
+        # Init local cache
+        self._local_ctx = {}
+
+        # Translate (blocks[0][0] is the current instr)
+        blocks, body = self._parse_body(fc_ast.body, argument_names)
+
+        # Build the new function
+        fc_ast.args.args[0:0] = [
+            gen_arg('ir', ast.Param()),
+            gen_arg('instr', ast.Param())
+        ]
+        cur_instr = blocks[0][0]
+        if len(blocks[-1][0]) == 0:
+            ## Last block can be empty
+            blocks.pop()
+        other_blocks = blocks[1:]
+        body.append(ast.Return(value=ast.Tuple(elts=[ast.List(elts=cur_instr,
+                                                              ctx=ast.Load()),
+                                                     ast.List(elts=other_blocks,
+                                                              ctx=ast.Load())],
+                                               ctx=ast.Load())))
+
+        ret = ast.parse('')
+        ret.body = [ast.FunctionDef(name=fc_ast.name,
+                                    args=fc_ast.args,
+                                    body=body,
+                                    decorator_list=[])]
+
+        # To display the generated function, use codegen.to_source
+        # codegen: https://github.com/andreif/codegen
+
+        # Compile according to the context
+        fixed = ast.fix_missing_locations(ret)
+        codeobj = compile(fixed, '<string>', 'exec')
+        ctx = self._ctx.copy()
+        eval(codeobj, ctx)
+
+        # Get the function back
+        self._functions[fc_ast.name] = ctx[fc_ast.name]
+        return ctx[fc_ast.name]