about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorCamille Mougey <commial@gmail.com>2017-02-13 16:24:19 +0100
committerGitHub <noreply@github.com>2017-02-13 16:24:19 +0100
commit90e170f45e342d03875e95b86afb038cb25ad2c1 (patch)
treef26149c559b32c700a8d7fb7c2a74693229d9f58
parent827c6cb8e1cdcc6e501c319353f89615b9cc09c9 (diff)
parenta7c9a7f769094d0af3b7a98bdb7319bcb3921e11 (diff)
downloadmiasm-90e170f45e342d03875e95b86afb038cb25ad2c1.tar.gz
miasm-90e170f45e342d03875e95b86afb038cb25ad2c1.zip
Merge pull request #493 from serpilliere/fix_asm_parsing
Fix asm parsing
-rw-r--r--example/expression/solve_condition_stp.py2
-rw-r--r--miasm2/arch/aarch64/arch.py31
-rw-r--r--miasm2/arch/arm/arch.py8
-rw-r--r--miasm2/arch/mips32/arch.py8
-rw-r--r--miasm2/arch/msp430/arch.py8
-rw-r--r--miasm2/arch/sh4/arch.py8
-rw-r--r--miasm2/arch/x86/arch.py8
-rw-r--r--miasm2/core/cpu.py191
-rw-r--r--miasm2/core/parse_asm.py8
-rw-r--r--test/arch/aarch64/unit/asm_test.py2
-rw-r--r--test/arch/mips32/unit/asm_test.py2
-rw-r--r--test/arch/x86/arch.py4
-rw-r--r--test/arch/x86/unit/asm_test.py2
13 files changed, 151 insertions, 131 deletions
diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py
index 67d536d5..2a3b42fd 100644
--- a/example/expression/solve_condition_stp.py
+++ b/example/expression/solve_condition_stp.py
@@ -143,7 +143,7 @@ if __name__ == '__main__':
         else:
             return ExprId(string_parsed, size=32)
 
-    my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr)
+    my_var_parser = ParseAst(my_ast_id2expr, my_ast_int2expr)
     base_expr.setParseAction(my_var_parser)
 
     argc = ExprId('argc', 32)
diff --git a/miasm2/arch/aarch64/arch.py b/miasm2/arch/aarch64/arch.py
index c875d787..460c134e 100644
--- a/miasm2/arch/aarch64/arch.py
+++ b/miasm2/arch/aarch64/arch.py
@@ -74,34 +74,29 @@ _, _, base_expr64 = gen_base_expr()
 
 
 def ast_id2expr32(t):
-    if not t in mn_aarch64.regs.all_regs_ids_byname:
-        r = m2_expr.ExprId(asm_label(t))
-    else:
-        r = mn_aarch64.regs.all_regs_ids_byname[t]
-    if not r.size == 32:
-        raise StopIteration
-    return r
-
+    if t in mn_aarch64.regs.all_regs_ids_byname:
+        t = mn_aarch64.regs.all_regs_ids_byname[t]
+        if not t.size == 32:
+            raise StopIteration
+    return t
 
 def ast_int2expr32(a):
     return m2_expr.ExprInt32(a)
 
 
 def ast_id2expr64(t):
-    if not t in mn_aarch64.regs.all_regs_ids_byname:
-        r = m2_expr.ExprId(asm_label(t))
-    else:
-        r = mn_aarch64.regs.all_regs_ids_byname[t]
-    if not r.size == 64:
-        raise StopIteration
-    return r
+    if t in mn_aarch64.regs.all_regs_ids_byname:
+        t = mn_aarch64.regs.all_regs_ids_byname[t]
+        if not t.size == 64:
+            raise StopIteration
+    return t
 
 
 def ast_int2expr64(a):
     return m2_expr.ExprInt64(a)
 
-my_var_parser32 = parse_ast(ast_id2expr32, ast_int2expr32)
-my_var_parser64 = parse_ast(ast_id2expr64, ast_int2expr64)
+my_var_parser32 = ParseAst(ast_id2expr32, ast_int2expr32, default_size=32)
+my_var_parser64 = ParseAst(ast_id2expr64, ast_int2expr64, default_size=64)
 
 base_expr32.setParseAction(my_var_parser32)
 base_expr64.setParseAction(my_var_parser64)
@@ -234,7 +229,7 @@ simds_info = {8: simd08_info,
               128: simd128_info}
 
 
-my_var_parser = parse_ast(ast_id2expr, ast_int2expr)
+my_var_parser = ParseAst(ast_id2expr, ast_int2expr)
 base_expr.setParseAction(my_var_parser)
 
 
diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py
index 0e58008d..d9bf42ba 100644
--- a/miasm2/arch/arm/arch.py
+++ b/miasm2/arch/arm/arch.py
@@ -181,18 +181,14 @@ int_or_expr = base_expr
 
 
 def ast_id2expr(t):
-    if not t in mn_arm.regs.all_regs_ids_byname:
-        r = ExprId(asm_label(t))
-    else:
-        r = mn_arm.regs.all_regs_ids_byname[t]
-    return r
+    return mn_arm.regs.all_regs_ids_byname.get(t, t)
 
 
 def ast_int2expr(a):
     return ExprInt32(a)
 
 
-my_var_parser = parse_ast(ast_id2expr, ast_int2expr)
+my_var_parser = ParseAst(ast_id2expr, ast_int2expr)
 base_expr.setParseAction(my_var_parser)
 
 
diff --git a/miasm2/arch/mips32/arch.py b/miasm2/arch/mips32/arch.py
index 2ac16770..79176205 100644
--- a/miasm2/arch/mips32/arch.py
+++ b/miasm2/arch/mips32/arch.py
@@ -53,18 +53,14 @@ int_or_expr = base_expr
 
 
 def ast_id2expr(t):
-    if not t in mn_mips32.regs.all_regs_ids_byname:
-        r = ExprId(asm_label(t))
-    else:
-        r = mn_mips32.regs.all_regs_ids_byname[t]
-    return r
+    return mn_mips32.regs.all_regs_ids_byname.get(t, t)
 
 
 def ast_int2expr(a):
     return ExprInt32(a)
 
 
-my_var_parser = cpu.parse_ast(ast_id2expr, ast_int2expr)
+my_var_parser = cpu.ParseAst(ast_id2expr, ast_int2expr)
 base_expr.setParseAction(my_var_parser)
 
 class additional_info:
diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py
index a9f695ec..d7463f3d 100644
--- a/miasm2/arch/msp430/arch.py
+++ b/miasm2/arch/msp430/arch.py
@@ -72,11 +72,7 @@ PINC = Suppress("+")
 
 
 def ast_id2expr(t):
-    if not t in mn_msp430.regs.all_regs_ids_byname:
-        r = ExprId(asm_label(t), 16)
-    else:
-        r = mn_msp430.regs.all_regs_ids_byname[t]
-    return r
+    return mn_msp430.regs.all_regs_ids_byname.get(t, t)
 
 
 def ast_int2expr(a):
@@ -85,7 +81,7 @@ def ast_int2expr(a):
 
 variable, operand, base_expr = gen_base_expr()
 
-my_var_parser = parse_ast(ast_id2expr, ast_int2expr)
+my_var_parser = ParseAst(ast_id2expr, ast_int2expr)
 base_expr.setParseAction(my_var_parser)
 
 
diff --git a/miasm2/arch/sh4/arch.py b/miasm2/arch/sh4/arch.py
index d72e6945..ae96fef1 100644
--- a/miasm2/arch/sh4/arch.py
+++ b/miasm2/arch/sh4/arch.py
@@ -36,17 +36,13 @@ def parse_pcandimmimm(t):
     return (t[0] & t[1]) + t[2]
 
 def ast_id2expr(t):
-    if not t in mn_sh4.regs.all_regs_ids_byname:
-        r = ExprId(asm_label(t))
-    else:
-        r = mn_sh4.regs.all_regs_ids_byname[t]
-    return r
+    return mn_sh4.regs.all_regs_ids_byname.get(t, t)
 
 def ast_int2expr(a):
     return ExprInt32(a)
 
 
-my_var_parser = parse_ast(ast_id2expr, ast_int2expr)
+my_var_parser = ParseAst(ast_id2expr, ast_int2expr)
 base_expr.setParseAction(my_var_parser)
 
 int_or_expr = base_expr
diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index 20fdc1cf..8ae6cd31 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -223,18 +223,14 @@ variable, operand, base_expr = gen_base_expr()
 
 
 def ast_id2expr(t):
-    if not t in mn_x86.regs.all_regs_ids_byname:
-        r = ExprId(asm_label(t))
-    else:
-        r = mn_x86.regs.all_regs_ids_byname[t]
-    return r
+    return mn_x86.regs.all_regs_ids_byname.get(t, t)
 
 
 def ast_int2expr(a):
     return ExprInt64(a)
 
 
-my_var_parser = parse_ast(ast_id2expr, ast_int2expr)
+my_var_parser = ParseAst(ast_id2expr, ast_int2expr)
 base_expr.setParseAction(my_var_parser)
 
 int_or_expr = base_expr
diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py
index 22f4c8ab..1beeeff0 100644
--- a/miasm2/core/cpu.py
+++ b/miasm2/core/cpu.py
@@ -199,84 +199,123 @@ def ast_int2expr(a):
     return m2_expr.ExprInt32(a)
 
 
-def ast_raw2expr(a, my_id2expr, my_int2expr):
-    assert(isinstance(a, tuple))
-    if a[0] is m2_expr.ExprId:
-        e = my_id2expr(a[1])
-    elif a[0] is m2_expr.ExprInt:
-        e = my_int2expr(a[1])
-    elif a[0] is m2_expr.ExprOp:
-        out = []
-        for x in a[1]:
-            if isinstance(x, tuple):
-                x = ast_raw2expr(x, my_id2expr, my_int2expr)
-            out.append(x)
-        e = ast_parse_op(out)
-    else:
-        raise TypeError('unknown type')
-    return e
-
-
-def ast_get_ids(a):
-    assert(isinstance(a, tuple))
-    if a[0] is m2_expr.ExprId:
-        return set([a[1]])
-    elif a[0] is m2_expr.ExprInt:
-        return set()
-    elif a[0] is m2_expr.ExprOp:
-        out = set()
-        for x in a[1]:
-            if isinstance(x, tuple):
-                out.update(ast_get_ids(x))
-        return out
-    raise TypeError('unknown type')
-
-
-def _extract_ast_core(a):
-    assert(isinstance(a, tuple))
-    if a[0] in [m2_expr.ExprInt, m2_expr.ExprId]:
-        return a
-    elif a[0] is m2_expr.ExprOp:
-        out = []
-        for x in a[1]:
-            if isinstance(x, tuple):
-                x = _extract_ast_core(x)
-            out.append(x)
-        return tuple([a[0]] + [out])
-    else:
-        raise TypeError('unknown type')
 
+class ParseAst(object):
 
-def extract_ast_core(v, my_id2expr, my_int2expr):
-    ast_tokens = _extract_ast_core(v)
-    ids = ast_get_ids(ast_tokens)
-    ids_expr = [my_id2expr(x) for x in ids]
-    sizes = set([i.size for i in ids_expr])
-
-    if len(sizes) == 0:
-        pass
-    elif len(sizes) == 1:
-        size = sizes.pop()
-        my_int2expr = lambda x: m2_expr.ExprInt(x, size)
-    else:
-        # Multiple sizes in ids
-        raise StopIteration
-    e = ast_raw2expr(ast_tokens, my_id2expr, my_int2expr)
-    return e
+    def __init__(self, id2expr, int2expr, default_size=32):
+        self.id2expr = id2expr
+        self.int2expr = int2expr
+        self.default_size = default_size
+
+    def int_from_size(self, size, value):
+        """Transform a string into ExprInt.
+        * if @size is None, use provided int2expr
+        * else, use @size to generate integer
+        @size: size of int; None if not forced.
+        @value: string representing an integer
+        """
+        if size is None:
+            return self.int2expr(value)
+        else:
+            return m2_expr.ExprInt(value, size)
+
+    def id_from_size(self, size, value):
+        """Transform a string into ExprId.
+        * if @size is None, use provided id2expr
+        * else, use @size to generate id
+        @size: size of id; None if not forced.
+        @value: string representing the id
+        """
+        value = self.id2expr(value)
+        if isinstance(value, m2_expr.Expr):
+            return value
+        if size is None:
+            size = self.default_size
+        assert value is not None
+        return m2_expr.ExprId(asmbloc.asm_label(value), size)
+
+    def ast_to_expr(self, size, ast):
+        """Transform a typed ast into a Miasm expression
+        @size: default size
+        @ast: typed ast
+        """
+        assert(isinstance(ast, tuple))
+        if ast[0] is m2_expr.ExprId:
+            expr = self.id_from_size(size, ast[1])
+            if isinstance(expr, str):
+                expr = self.id_from_size(size, expr)
+        elif ast[0] is m2_expr.ExprInt:
+            expr = self.int_from_size(size, ast[1])
+        elif ast[0] is m2_expr.ExprOp:
+            out = []
+            for arg in ast[1]:
+                if isinstance(arg, tuple):
+                    arg = self.ast_to_expr(size, arg)
+                out.append(arg)
+            expr = ast_parse_op(out)
+        else:
+            raise TypeError('unknown type')
+        return expr
 
+    def ast_get_ids(self, ast):
+        """Retrieve every node of type ExprId in @ast
+        @ast: typed ast
+        """
+        assert(isinstance(ast, tuple))
+        if ast[0] is m2_expr.ExprId:
+            return set([ast[1]])
+        elif ast[0] is m2_expr.ExprInt:
+            return set()
+        elif ast[0] is m2_expr.ExprOp:
+            out = set()
+            for x in ast[1]:
+                if isinstance(x, tuple):
+                    out.update(self.ast_get_ids(x))
+            return out
+        raise TypeError('unknown type')
 
-class parse_ast:
+    def _extract_ast_core(self, ast):
+        assert(isinstance(ast, tuple))
+        if ast[0] in [m2_expr.ExprInt, m2_expr.ExprId]:
+            return ast
+        elif ast[0] is m2_expr.ExprOp:
+            out = []
+            for arg in ast[1]:
+                if isinstance(arg, tuple):
+                    arg = self._extract_ast_core(arg)
+                out.append(arg)
+            return tuple([ast[0]] + [out])
+        else:
+            raise TypeError('unknown type')
 
-    def __init__(self, id2expr, int2expr, extract_ast=extract_ast_core):
-        self.id2expr = id2expr
-        self.int2expr = int2expr
-        self.extract_ast_core = extract_ast
+    def extract_ast_core(self, ast):
+        """
+        Trasform an @ast into a Miasm expression.
+        Use registers size to deduce label and integers sizes.
+        """
+        ast = self._extract_ast_core(ast)
+        ids = self.ast_get_ids(ast)
+        ids_expr = [self.id2expr(x) for x in ids]
+        sizes = set([expr.size for expr in ids_expr
+                     if isinstance(expr, m2_expr.Expr)])
+        if not sizes:
+            size = None
+        elif len(sizes) == 1:
+            size = sizes.pop()
+        else:
+            # Multiple sizes in ids
+            raise StopIteration
+        return self.ast_to_expr(size, ast)
 
-    def __call__(self, v):
-        v = v[0]
-        if isinstance(v, m2_expr.Expr):
-            return v
-        return self.extract_ast_core(v, self.id2expr, self.int2expr)
+    def __call__(self, ast):
+        """
+        Trasform an @ast into a Miasm expression.
+        Use registers size to deduce label and integers sizes.
+        """
+        ast = ast[0]
+        if isinstance(ast, m2_expr.Expr):
+            return ast
+        return self.extract_ast_core(ast)
 
 
 def neg_int(t):
@@ -325,7 +364,7 @@ def gen_base_expr():
 
 variable, operand, base_expr = gen_base_expr()
 
-my_var_parser = parse_ast(ast_id2expr, ast_int2expr)
+my_var_parser = ParseAst(ast_id2expr, ast_int2expr)
 base_expr.setParseAction(my_var_parser)
 
 default_prio = 0x1337
@@ -922,6 +961,9 @@ class instruction(object):
     def get_asm_offset(self, expr):
         return m2_expr.ExprInt(self.offset, expr.size)
 
+    def get_asm_next_offset(self, expr):
+        return m2_expr.ExprInt(self.offset+self.l, expr.size)
+
     def resolve_args_with_symbols(self, symbols=None):
         if symbols is None:
             symbols = {}
@@ -938,6 +980,9 @@ class instruction(object):
                     if name == '$':
                         fixed_ids[x] = self.get_asm_offset(x)
                         continue
+                    if name == '_':
+                        fixed_ids[x] = self.get_asm_next_offset(x)
+                        continue
                     if not name in symbols:
                         raise ValueError('unresolved symbol! %r' % x)
                 else:
diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py
index 238306b3..11fa4040 100644
--- a/miasm2/core/parse_asm.py
+++ b/miasm2/core/parse_asm.py
@@ -3,7 +3,7 @@ import re
 
 import miasm2.expression.expression as m2_expr
 import miasm2.core.asmbloc as asmbloc
-from miasm2.core.cpu import gen_base_expr, parse_ast
+from miasm2.core.cpu import gen_base_expr, ParseAst
 from miasm2.core.cpu import instruction
 
 declarator = {'byte': 8,
@@ -169,9 +169,9 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None):
 
                 # parser
                 base_expr = gen_base_expr()[2]
-                my_var_parser = parse_ast(lambda x: m2_expr.ExprId(x, size),
-                                          lambda x:
-                                              m2_expr.ExprInt(x, size))
+                my_var_parser = ParseAst(lambda x: m2_expr.ExprId(x, size),
+                                         lambda x:
+                                         m2_expr.ExprInt(x, size))
                 base_expr.setParseAction(my_var_parser)
 
                 for element in data_raw:
diff --git a/test/arch/aarch64/unit/asm_test.py b/test/arch/aarch64/unit/asm_test.py
index cfb2a81c..54ab476d 100644
--- a/test/arch/aarch64/unit/asm_test.py
+++ b/test/arch/aarch64/unit/asm_test.py
@@ -1,7 +1,7 @@
 import sys
 import os
 
-from miasm2.core.cpu import parse_ast
+from miasm2.core.cpu import ParseAst
 from miasm2.arch.aarch64.arch import mn_aarch64, base_expr, variable
 from miasm2.core import parse_asm
 from miasm2.expression.expression import *
diff --git a/test/arch/mips32/unit/asm_test.py b/test/arch/mips32/unit/asm_test.py
index 35d87d85..4425bb65 100644
--- a/test/arch/mips32/unit/asm_test.py
+++ b/test/arch/mips32/unit/asm_test.py
@@ -1,7 +1,7 @@
 import sys
 import os
 
-from miasm2.core.cpu import parse_ast
+from miasm2.core.cpu import ParseAst
 from miasm2.arch.mips32.arch import mn_mips32, base_expr, variable
 from miasm2.core import parse_asm
 from miasm2.expression.expression import *
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index b0ea7cb4..83039368 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -1,7 +1,7 @@
 import os
 import time
 import miasm2.expression.expression as m2_expr
-from miasm2.arch.x86.arch import mn_x86, deref_mem_ad, parse_ast, ast_int2expr, \
+from miasm2.arch.x86.arch import mn_x86, deref_mem_ad, ParseAst, ast_int2expr, \
     base_expr, rmarg, print_size
 from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64
 from miasm2.core.bin_stream import bin_stream_str
@@ -34,7 +34,7 @@ def my_ast_id2expr(t):
     r = reg_and_id.get(t, m2_expr.ExprId(t, size=32))
     return r
 
-my_var_parser = parse_ast(my_ast_id2expr, ast_int2expr)
+my_var_parser = ParseAst(my_ast_id2expr, ast_int2expr)
 base_expr.setParseAction(my_var_parser)
 
 for s in ['EAX',
diff --git a/test/arch/x86/unit/asm_test.py b/test/arch/x86/unit/asm_test.py
index bf97dbe4..524791ce 100644
--- a/test/arch/x86/unit/asm_test.py
+++ b/test/arch/x86/unit/asm_test.py
@@ -1,7 +1,7 @@
 import sys
 import os
 
-from miasm2.core.cpu import parse_ast
+from miasm2.core.cpu import ParseAst
 from miasm2.arch.x86.arch import mn_x86, base_expr, variable
 from miasm2.core import parse_asm
 from miasm2.expression.expression import *