diff options
| author | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2025-10-14 09:09:29 +0000 |
|---|---|---|
| committer | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2025-10-14 09:09:29 +0000 |
| commit | 579cf1d03fb932083e6317967d1613d5c2587fb6 (patch) | |
| tree | 629f039935382a2a7391bce9253f6c9968159049 /src/miasm/core/sembuilder.py | |
| parent | 51c15d3ea2e16d4fc5f0f01a3b9befc66b1f982e (diff) | |
| download | focaccia-miasm-ta/nix.tar.gz focaccia-miasm-ta/nix.zip | |
Convert to src-layout ta/nix
Diffstat (limited to 'src/miasm/core/sembuilder.py')
| -rw-r--r-- | src/miasm/core/sembuilder.py | 341 |
1 files changed, 341 insertions, 0 deletions
diff --git a/src/miasm/core/sembuilder.py b/src/miasm/core/sembuilder.py new file mode 100644 index 00000000..9843ee6a --- /dev/null +++ b/src/miasm/core/sembuilder.py @@ -0,0 +1,341 @@ +"Helper to quickly build instruction's semantic side effects" + +import inspect +import ast +import re + +from future.utils import PY3 + +import miasm.expression.expression as m2_expr +from miasm.ir.ir import IRBlock, AssignBlock + + +class MiasmTransformer(ast.NodeTransformer): + """AST visitor translating DSL to Miasm expression + + memX[Y] -> ExprMem(Y, X) + iX(Y) -> ExprIntX(Y) + X if Y else Z -> ExprCond(Y, X, Z) + 'X'(Y) -> ExprOp('X', Y) + ('X' % Y)(Z) -> ExprOp('X' % Y, Z) + {a, b} -> ExprCompose(((a, 0, a.size), (b, a.size, a.size + b.size))) + """ + + # Parsers + parse_integer = re.compile(r"^i([0-9]+)$") + parse_mem = re.compile(r"^mem([0-9]+)$") + + # Visitors + def visit_Call(self, node): + """iX(Y) -> ExprIntX(Y), + 'X'(Y) -> ExprOp('X', Y), ('X' % Y)(Z) -> ExprOp('X' % Y, Z)""" + + # Recursive visit + node = self.generic_visit(node) + if isinstance(node.func, ast.Name): + # iX(Y) -> ExprInt(Y, X) + fc_name = node.func.id + + # Match the function name + new_name = fc_name + integer = self.parse_integer.search(fc_name) + + # Do replacement + if integer is not None: + size = int(integer.groups()[0]) + new_name = "ExprInt" + # Replace in the node + node.func.id = new_name + node.args.append(ast.Num(n=size)) + + elif (isinstance(node.func, ast.Str) or + (isinstance(node.func, ast.BinOp) and + isinstance(node.func.op, ast.Mod) and + isinstance(node.func.left, ast.Str))): + # 'op'(args...) -> ExprOp('op', args...) + # ('op' % (fmt))(args...) -> ExprOp('op' % (fmt), args...) + op_name = node.func + + # Do replacement + node.func = ast.Name(id="ExprOp", ctx=ast.Load()) + node.args[0:0] = [op_name] + + return node + + def visit_IfExp(self, node): + """X if Y else Z -> ExprCond(Y, X, Z)""" + # Recursive visit + node = self.generic_visit(node) + + # Build the new ExprCond + call = ast.Call(func=ast.Name(id='ExprCond', ctx=ast.Load()), + args=[self.visit(node.test), + self.visit(node.body), + self.visit(node.orelse)], + keywords=[], starargs=None, kwargs=None) + return call + + def visit_Set(self, node): + "{a, b} -> ExprCompose(a, b)" + if len(node.elts) == 0: + return node + + # Recursive visit + node = self.generic_visit(node) + + return ast.Call(func=ast.Name(id='ExprCompose', + ctx=ast.Load()), + args=node.elts, + keywords=[], + starargs=None, + kwargs=None) + +if PY3: + def get_arg_name(name): + return name.arg + def gen_arg(name, ctx): + return ast.arg(arg=name, ctx=ctx) +else: + def get_arg_name(name): + return name.id + def gen_arg(name, ctx): + return ast.Name(id=name, ctx=ctx) + + +class SemBuilder(object): + """Helper for building instruction's semantic side effects method + + This class provides a decorator @parse to use on them. + The context in which the function will be parsed must be supplied on + instantiation + """ + + def __init__(self, ctx): + """Create a SemBuilder + @ctx: context dictionary used during parsing + """ + # Init + self.transformer = MiasmTransformer() + self._ctx = dict(m2_expr.__dict__) + self._ctx["IRBlock"] = IRBlock + self._ctx["AssignBlock"] = AssignBlock + self._functions = {} + + # Update context + self._ctx.update(ctx) + + @property + def functions(self): + """Return a dictionary name -> func of parsed functions""" + return self._functions.copy() + + @staticmethod + def _create_labels(loc_else=False): + """Return the AST standing for label creations + @loc_else (optional): if set, create a label 'loc_else'""" + loc_end = "loc_end = ir.get_next_loc_key(instr)" + loc_end_expr = "loc_end_expr = ExprLoc(loc_end, ir.IRDst.size)" + out = ast.parse(loc_end).body + out += ast.parse(loc_end_expr).body + loc_if = "loc_if = ir.loc_db.add_location()" + loc_if_expr = "loc_if_expr = ExprLoc(loc_if, ir.IRDst.size)" + out += ast.parse(loc_if).body + out += ast.parse(loc_if_expr).body + if loc_else: + loc_else = "loc_else = ir.loc_db.add_location()" + loc_else_expr = "loc_else_expr = ExprLoc(loc_else, ir.IRDst.size)" + out += ast.parse(loc_else).body + out += ast.parse(loc_else_expr).body + return out + + def _parse_body(self, body, argument_names): + """Recursive function transforming a @body to a block expression + Return: + - AST to append to body (real python statements) + - a list of blocks, ie list of affblock, ie list of ExprAssign (AST)""" + + # Init + ## Real instructions + real_body = [] + ## Final blocks + blocks = [[[]]] + + for statement in body: + + if isinstance(statement, ast.Assign): + src = self.transformer.visit(statement.value) + dst = self.transformer.visit(statement.targets[0]) + + if (isinstance(dst, ast.Name) and + dst.id not in argument_names and + dst.id not in self._ctx and + dst.id not in self._local_ctx): + + # Real variable declaration + statement.value = src + real_body.append(statement) + self._local_ctx[dst.id] = src + continue + + dst.ctx = ast.Load() + + res = ast.Call(func=ast.Name(id='ExprAssign', + ctx=ast.Load()), + args=[dst, src], + keywords=[], + starargs=None, + kwargs=None) + + blocks[-1][-1].append(res) + + elif (isinstance(statement, ast.Expr) and + isinstance(statement.value, ast.Str)): + # String (docstring, comment, ...) -> keep it + real_body.append(statement) + + elif isinstance(statement, ast.If): + # Create jumps : ir.IRDst = loc_if if cond else loc_end + # if .. else .. are also handled + cond = statement.test + real_body += self._create_labels(loc_else=True) + + loc_end = ast.Name(id='loc_end_expr', ctx=ast.Load()) + loc_if = ast.Name(id='loc_if_expr', ctx=ast.Load()) + loc_else = ast.Name(id='loc_else_expr', ctx=ast.Load()) \ + if statement.orelse else loc_end + dst = ast.Call(func=ast.Name(id='ExprCond', + ctx=ast.Load()), + args=[cond, + loc_if, + loc_else], + keywords=[], + starargs=None, + kwargs=None) + + if (isinstance(cond, ast.UnaryOp) and + isinstance(cond.op, ast.Not)): + ## if not cond -> switch exprCond + dst.args[1:] = dst.args[1:][::-1] + dst.args[0] = cond.operand + + IRDst = ast.Attribute(value=ast.Name(id='ir', + ctx=ast.Load()), + attr='IRDst', ctx=ast.Load()) + loc_db = ast.Attribute(value=ast.Name(id='ir', + ctx=ast.Load()), + attr='loc_db', ctx=ast.Load()) + blocks[-1][-1].append(ast.Call(func=ast.Name(id='ExprAssign', + ctx=ast.Load()), + args=[IRDst, dst], + keywords=[], + starargs=None, + kwargs=None)) + + # Create the new blocks + elements = [(statement.body, 'loc_if')] + if statement.orelse: + elements.append((statement.orelse, 'loc_else')) + for content, loc_name in elements: + sub_blocks, sub_body = self._parse_body(content, + argument_names) + if len(sub_blocks) > 1: + raise RuntimeError("Imbricated if unimplemented") + + ## Close the last block + jmp_end = ast.Call(func=ast.Name(id='ExprAssign', + ctx=ast.Load()), + args=[IRDst, loc_end], + keywords=[], + starargs=None, + kwargs=None) + sub_blocks[-1][-1].append(jmp_end) + + + instr = ast.Name(id='instr', ctx=ast.Load()) + effects = ast.List(elts=sub_blocks[-1][-1], + ctx=ast.Load()) + assignblk = ast.Call(func=ast.Name(id='AssignBlock', + ctx=ast.Load()), + args=[effects, instr], + keywords=[], + starargs=None, + kwargs=None) + + + ## Replace the block with a call to 'IRBlock' + loc_if_name = ast.Name(id=loc_name, ctx=ast.Load()) + + assignblks = ast.List(elts=[assignblk], + ctx=ast.Load()) + + sub_blocks[-1] = ast.Call(func=ast.Name(id='IRBlock', + ctx=ast.Load()), + args=[ + loc_db, + loc_if_name, + assignblks + ], + keywords=[], + starargs=None, + kwargs=None) + blocks += sub_blocks + real_body += sub_body + + # Prepare a new block for following statement + blocks.append([[]]) + + else: + # TODO: real var, +=, /=, -=, <<=, >>=, if/else, ... + raise RuntimeError("Unimplemented %s" % statement) + + return blocks, real_body + + def parse(self, func): + """Function decorator, returning a correct method from a pseudo-Python + one""" + + # Get the function AST + parsed = ast.parse(inspect.getsource(func)) + fc_ast = parsed.body[0] + argument_names = [get_arg_name(name) for name in fc_ast.args.args] + + # Init local cache + self._local_ctx = {} + + # Translate (blocks[0][0] is the current instr) + blocks, body = self._parse_body(fc_ast.body, argument_names) + + # Build the new function + fc_ast.args.args[0:0] = [ + gen_arg('ir', ast.Param()), + gen_arg('instr', ast.Param()) + ] + cur_instr = blocks[0][0] + if len(blocks[-1][0]) == 0: + ## Last block can be empty + blocks.pop() + other_blocks = blocks[1:] + body.append(ast.Return(value=ast.Tuple(elts=[ast.List(elts=cur_instr, + ctx=ast.Load()), + ast.List(elts=other_blocks, + ctx=ast.Load())], + ctx=ast.Load()))) + + ret = ast.parse('') + ret.body = [ast.FunctionDef(name=fc_ast.name, + args=fc_ast.args, + body=body, + decorator_list=[])] + + # To display the generated function, use codegen.to_source + # codegen: https://github.com/andreif/codegen + + # Compile according to the context + fixed = ast.fix_missing_locations(ret) + codeobj = compile(fixed, '<string>', 'exec') + ctx = self._ctx.copy() + eval(codeobj, ctx) + + # Get the function back + self._functions[fc_ast.name] = ctx[fc_ast.name] + return ctx[fc_ast.name] |