diff options
| author | Camille Mougey <commial@gmail.com> | 2017-02-13 16:24:19 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-02-13 16:24:19 +0100 |
| commit | 90e170f45e342d03875e95b86afb038cb25ad2c1 (patch) | |
| tree | f26149c559b32c700a8d7fb7c2a74693229d9f58 | |
| parent | 827c6cb8e1cdcc6e501c319353f89615b9cc09c9 (diff) | |
| parent | a7c9a7f769094d0af3b7a98bdb7319bcb3921e11 (diff) | |
| download | miasm-90e170f45e342d03875e95b86afb038cb25ad2c1.tar.gz miasm-90e170f45e342d03875e95b86afb038cb25ad2c1.zip | |
Merge pull request #493 from serpilliere/fix_asm_parsing
Fix asm parsing
| -rw-r--r-- | example/expression/solve_condition_stp.py | 2 | ||||
| -rw-r--r-- | miasm2/arch/aarch64/arch.py | 31 | ||||
| -rw-r--r-- | miasm2/arch/arm/arch.py | 8 | ||||
| -rw-r--r-- | miasm2/arch/mips32/arch.py | 8 | ||||
| -rw-r--r-- | miasm2/arch/msp430/arch.py | 8 | ||||
| -rw-r--r-- | miasm2/arch/sh4/arch.py | 8 | ||||
| -rw-r--r-- | miasm2/arch/x86/arch.py | 8 | ||||
| -rw-r--r-- | miasm2/core/cpu.py | 191 | ||||
| -rw-r--r-- | miasm2/core/parse_asm.py | 8 | ||||
| -rw-r--r-- | test/arch/aarch64/unit/asm_test.py | 2 | ||||
| -rw-r--r-- | test/arch/mips32/unit/asm_test.py | 2 | ||||
| -rw-r--r-- | test/arch/x86/arch.py | 4 | ||||
| -rw-r--r-- | test/arch/x86/unit/asm_test.py | 2 |
13 files changed, 151 insertions, 131 deletions
diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py index 67d536d5..2a3b42fd 100644 --- a/example/expression/solve_condition_stp.py +++ b/example/expression/solve_condition_stp.py @@ -143,7 +143,7 @@ if __name__ == '__main__': else: return ExprId(string_parsed, size=32) - my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) + my_var_parser = ParseAst(my_ast_id2expr, my_ast_int2expr) base_expr.setParseAction(my_var_parser) argc = ExprId('argc', 32) diff --git a/miasm2/arch/aarch64/arch.py b/miasm2/arch/aarch64/arch.py index c875d787..460c134e 100644 --- a/miasm2/arch/aarch64/arch.py +++ b/miasm2/arch/aarch64/arch.py @@ -74,34 +74,29 @@ _, _, base_expr64 = gen_base_expr() def ast_id2expr32(t): - if not t in mn_aarch64.regs.all_regs_ids_byname: - r = m2_expr.ExprId(asm_label(t)) - else: - r = mn_aarch64.regs.all_regs_ids_byname[t] - if not r.size == 32: - raise StopIteration - return r - + if t in mn_aarch64.regs.all_regs_ids_byname: + t = mn_aarch64.regs.all_regs_ids_byname[t] + if not t.size == 32: + raise StopIteration + return t def ast_int2expr32(a): return m2_expr.ExprInt32(a) def ast_id2expr64(t): - if not t in mn_aarch64.regs.all_regs_ids_byname: - r = m2_expr.ExprId(asm_label(t)) - else: - r = mn_aarch64.regs.all_regs_ids_byname[t] - if not r.size == 64: - raise StopIteration - return r + if t in mn_aarch64.regs.all_regs_ids_byname: + t = mn_aarch64.regs.all_regs_ids_byname[t] + if not t.size == 64: + raise StopIteration + return t def ast_int2expr64(a): return m2_expr.ExprInt64(a) -my_var_parser32 = parse_ast(ast_id2expr32, ast_int2expr32) -my_var_parser64 = parse_ast(ast_id2expr64, ast_int2expr64) +my_var_parser32 = ParseAst(ast_id2expr32, ast_int2expr32, default_size=32) +my_var_parser64 = ParseAst(ast_id2expr64, ast_int2expr64, default_size=64) base_expr32.setParseAction(my_var_parser32) base_expr64.setParseAction(my_var_parser64) @@ -234,7 +229,7 @@ simds_info = {8: simd08_info, 128: simd128_info} -my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +my_var_parser = ParseAst(ast_id2expr, ast_int2expr) base_expr.setParseAction(my_var_parser) diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py index 0e58008d..d9bf42ba 100644 --- a/miasm2/arch/arm/arch.py +++ b/miasm2/arch/arm/arch.py @@ -181,18 +181,14 @@ int_or_expr = base_expr def ast_id2expr(t): - if not t in mn_arm.regs.all_regs_ids_byname: - r = ExprId(asm_label(t)) - else: - r = mn_arm.regs.all_regs_ids_byname[t] - return r + return mn_arm.regs.all_regs_ids_byname.get(t, t) def ast_int2expr(a): return ExprInt32(a) -my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +my_var_parser = ParseAst(ast_id2expr, ast_int2expr) base_expr.setParseAction(my_var_parser) diff --git a/miasm2/arch/mips32/arch.py b/miasm2/arch/mips32/arch.py index 2ac16770..79176205 100644 --- a/miasm2/arch/mips32/arch.py +++ b/miasm2/arch/mips32/arch.py @@ -53,18 +53,14 @@ int_or_expr = base_expr def ast_id2expr(t): - if not t in mn_mips32.regs.all_regs_ids_byname: - r = ExprId(asm_label(t)) - else: - r = mn_mips32.regs.all_regs_ids_byname[t] - return r + return mn_mips32.regs.all_regs_ids_byname.get(t, t) def ast_int2expr(a): return ExprInt32(a) -my_var_parser = cpu.parse_ast(ast_id2expr, ast_int2expr) +my_var_parser = cpu.ParseAst(ast_id2expr, ast_int2expr) base_expr.setParseAction(my_var_parser) class additional_info: diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py index a9f695ec..d7463f3d 100644 --- a/miasm2/arch/msp430/arch.py +++ b/miasm2/arch/msp430/arch.py @@ -72,11 +72,7 @@ PINC = Suppress("+") def ast_id2expr(t): - if not t in mn_msp430.regs.all_regs_ids_byname: - r = ExprId(asm_label(t), 16) - else: - r = mn_msp430.regs.all_regs_ids_byname[t] - return r + return mn_msp430.regs.all_regs_ids_byname.get(t, t) def ast_int2expr(a): @@ -85,7 +81,7 @@ def ast_int2expr(a): variable, operand, base_expr = gen_base_expr() -my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +my_var_parser = ParseAst(ast_id2expr, ast_int2expr) base_expr.setParseAction(my_var_parser) diff --git a/miasm2/arch/sh4/arch.py b/miasm2/arch/sh4/arch.py index d72e6945..ae96fef1 100644 --- a/miasm2/arch/sh4/arch.py +++ b/miasm2/arch/sh4/arch.py @@ -36,17 +36,13 @@ def parse_pcandimmimm(t): return (t[0] & t[1]) + t[2] def ast_id2expr(t): - if not t in mn_sh4.regs.all_regs_ids_byname: - r = ExprId(asm_label(t)) - else: - r = mn_sh4.regs.all_regs_ids_byname[t] - return r + return mn_sh4.regs.all_regs_ids_byname.get(t, t) def ast_int2expr(a): return ExprInt32(a) -my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +my_var_parser = ParseAst(ast_id2expr, ast_int2expr) base_expr.setParseAction(my_var_parser) int_or_expr = base_expr diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 20fdc1cf..8ae6cd31 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -223,18 +223,14 @@ variable, operand, base_expr = gen_base_expr() def ast_id2expr(t): - if not t in mn_x86.regs.all_regs_ids_byname: - r = ExprId(asm_label(t)) - else: - r = mn_x86.regs.all_regs_ids_byname[t] - return r + return mn_x86.regs.all_regs_ids_byname.get(t, t) def ast_int2expr(a): return ExprInt64(a) -my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +my_var_parser = ParseAst(ast_id2expr, ast_int2expr) base_expr.setParseAction(my_var_parser) int_or_expr = base_expr diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index 22f4c8ab..1beeeff0 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -199,84 +199,123 @@ def ast_int2expr(a): return m2_expr.ExprInt32(a) -def ast_raw2expr(a, my_id2expr, my_int2expr): - assert(isinstance(a, tuple)) - if a[0] is m2_expr.ExprId: - e = my_id2expr(a[1]) - elif a[0] is m2_expr.ExprInt: - e = my_int2expr(a[1]) - elif a[0] is m2_expr.ExprOp: - out = [] - for x in a[1]: - if isinstance(x, tuple): - x = ast_raw2expr(x, my_id2expr, my_int2expr) - out.append(x) - e = ast_parse_op(out) - else: - raise TypeError('unknown type') - return e - - -def ast_get_ids(a): - assert(isinstance(a, tuple)) - if a[0] is m2_expr.ExprId: - return set([a[1]]) - elif a[0] is m2_expr.ExprInt: - return set() - elif a[0] is m2_expr.ExprOp: - out = set() - for x in a[1]: - if isinstance(x, tuple): - out.update(ast_get_ids(x)) - return out - raise TypeError('unknown type') - - -def _extract_ast_core(a): - assert(isinstance(a, tuple)) - if a[0] in [m2_expr.ExprInt, m2_expr.ExprId]: - return a - elif a[0] is m2_expr.ExprOp: - out = [] - for x in a[1]: - if isinstance(x, tuple): - x = _extract_ast_core(x) - out.append(x) - return tuple([a[0]] + [out]) - else: - raise TypeError('unknown type') +class ParseAst(object): -def extract_ast_core(v, my_id2expr, my_int2expr): - ast_tokens = _extract_ast_core(v) - ids = ast_get_ids(ast_tokens) - ids_expr = [my_id2expr(x) for x in ids] - sizes = set([i.size for i in ids_expr]) - - if len(sizes) == 0: - pass - elif len(sizes) == 1: - size = sizes.pop() - my_int2expr = lambda x: m2_expr.ExprInt(x, size) - else: - # Multiple sizes in ids - raise StopIteration - e = ast_raw2expr(ast_tokens, my_id2expr, my_int2expr) - return e + def __init__(self, id2expr, int2expr, default_size=32): + self.id2expr = id2expr + self.int2expr = int2expr + self.default_size = default_size + + def int_from_size(self, size, value): + """Transform a string into ExprInt. + * if @size is None, use provided int2expr + * else, use @size to generate integer + @size: size of int; None if not forced. + @value: string representing an integer + """ + if size is None: + return self.int2expr(value) + else: + return m2_expr.ExprInt(value, size) + + def id_from_size(self, size, value): + """Transform a string into ExprId. + * if @size is None, use provided id2expr + * else, use @size to generate id + @size: size of id; None if not forced. + @value: string representing the id + """ + value = self.id2expr(value) + if isinstance(value, m2_expr.Expr): + return value + if size is None: + size = self.default_size + assert value is not None + return m2_expr.ExprId(asmbloc.asm_label(value), size) + + def ast_to_expr(self, size, ast): + """Transform a typed ast into a Miasm expression + @size: default size + @ast: typed ast + """ + assert(isinstance(ast, tuple)) + if ast[0] is m2_expr.ExprId: + expr = self.id_from_size(size, ast[1]) + if isinstance(expr, str): + expr = self.id_from_size(size, expr) + elif ast[0] is m2_expr.ExprInt: + expr = self.int_from_size(size, ast[1]) + elif ast[0] is m2_expr.ExprOp: + out = [] + for arg in ast[1]: + if isinstance(arg, tuple): + arg = self.ast_to_expr(size, arg) + out.append(arg) + expr = ast_parse_op(out) + else: + raise TypeError('unknown type') + return expr + def ast_get_ids(self, ast): + """Retrieve every node of type ExprId in @ast + @ast: typed ast + """ + assert(isinstance(ast, tuple)) + if ast[0] is m2_expr.ExprId: + return set([ast[1]]) + elif ast[0] is m2_expr.ExprInt: + return set() + elif ast[0] is m2_expr.ExprOp: + out = set() + for x in ast[1]: + if isinstance(x, tuple): + out.update(self.ast_get_ids(x)) + return out + raise TypeError('unknown type') -class parse_ast: + def _extract_ast_core(self, ast): + assert(isinstance(ast, tuple)) + if ast[0] in [m2_expr.ExprInt, m2_expr.ExprId]: + return ast + elif ast[0] is m2_expr.ExprOp: + out = [] + for arg in ast[1]: + if isinstance(arg, tuple): + arg = self._extract_ast_core(arg) + out.append(arg) + return tuple([ast[0]] + [out]) + else: + raise TypeError('unknown type') - def __init__(self, id2expr, int2expr, extract_ast=extract_ast_core): - self.id2expr = id2expr - self.int2expr = int2expr - self.extract_ast_core = extract_ast + def extract_ast_core(self, ast): + """ + Trasform an @ast into a Miasm expression. + Use registers size to deduce label and integers sizes. + """ + ast = self._extract_ast_core(ast) + ids = self.ast_get_ids(ast) + ids_expr = [self.id2expr(x) for x in ids] + sizes = set([expr.size for expr in ids_expr + if isinstance(expr, m2_expr.Expr)]) + if not sizes: + size = None + elif len(sizes) == 1: + size = sizes.pop() + else: + # Multiple sizes in ids + raise StopIteration + return self.ast_to_expr(size, ast) - def __call__(self, v): - v = v[0] - if isinstance(v, m2_expr.Expr): - return v - return self.extract_ast_core(v, self.id2expr, self.int2expr) + def __call__(self, ast): + """ + Trasform an @ast into a Miasm expression. + Use registers size to deduce label and integers sizes. + """ + ast = ast[0] + if isinstance(ast, m2_expr.Expr): + return ast + return self.extract_ast_core(ast) def neg_int(t): @@ -325,7 +364,7 @@ def gen_base_expr(): variable, operand, base_expr = gen_base_expr() -my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +my_var_parser = ParseAst(ast_id2expr, ast_int2expr) base_expr.setParseAction(my_var_parser) default_prio = 0x1337 @@ -922,6 +961,9 @@ class instruction(object): def get_asm_offset(self, expr): return m2_expr.ExprInt(self.offset, expr.size) + def get_asm_next_offset(self, expr): + return m2_expr.ExprInt(self.offset+self.l, expr.size) + def resolve_args_with_symbols(self, symbols=None): if symbols is None: symbols = {} @@ -938,6 +980,9 @@ class instruction(object): if name == '$': fixed_ids[x] = self.get_asm_offset(x) continue + if name == '_': + fixed_ids[x] = self.get_asm_next_offset(x) + continue if not name in symbols: raise ValueError('unresolved symbol! %r' % x) else: diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index 238306b3..11fa4040 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -3,7 +3,7 @@ import re import miasm2.expression.expression as m2_expr import miasm2.core.asmbloc as asmbloc -from miasm2.core.cpu import gen_base_expr, parse_ast +from miasm2.core.cpu import gen_base_expr, ParseAst from miasm2.core.cpu import instruction declarator = {'byte': 8, @@ -169,9 +169,9 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): # parser base_expr = gen_base_expr()[2] - my_var_parser = parse_ast(lambda x: m2_expr.ExprId(x, size), - lambda x: - m2_expr.ExprInt(x, size)) + my_var_parser = ParseAst(lambda x: m2_expr.ExprId(x, size), + lambda x: + m2_expr.ExprInt(x, size)) base_expr.setParseAction(my_var_parser) for element in data_raw: diff --git a/test/arch/aarch64/unit/asm_test.py b/test/arch/aarch64/unit/asm_test.py index cfb2a81c..54ab476d 100644 --- a/test/arch/aarch64/unit/asm_test.py +++ b/test/arch/aarch64/unit/asm_test.py @@ -1,7 +1,7 @@ import sys import os -from miasm2.core.cpu import parse_ast +from miasm2.core.cpu import ParseAst from miasm2.arch.aarch64.arch import mn_aarch64, base_expr, variable from miasm2.core import parse_asm from miasm2.expression.expression import * diff --git a/test/arch/mips32/unit/asm_test.py b/test/arch/mips32/unit/asm_test.py index 35d87d85..4425bb65 100644 --- a/test/arch/mips32/unit/asm_test.py +++ b/test/arch/mips32/unit/asm_test.py @@ -1,7 +1,7 @@ import sys import os -from miasm2.core.cpu import parse_ast +from miasm2.core.cpu import ParseAst from miasm2.arch.mips32.arch import mn_mips32, base_expr, variable from miasm2.core import parse_asm from miasm2.expression.expression import * diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index b0ea7cb4..83039368 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -1,7 +1,7 @@ import os import time import miasm2.expression.expression as m2_expr -from miasm2.arch.x86.arch import mn_x86, deref_mem_ad, parse_ast, ast_int2expr, \ +from miasm2.arch.x86.arch import mn_x86, deref_mem_ad, ParseAst, ast_int2expr, \ base_expr, rmarg, print_size from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 from miasm2.core.bin_stream import bin_stream_str @@ -34,7 +34,7 @@ def my_ast_id2expr(t): r = reg_and_id.get(t, m2_expr.ExprId(t, size=32)) return r -my_var_parser = parse_ast(my_ast_id2expr, ast_int2expr) +my_var_parser = ParseAst(my_ast_id2expr, ast_int2expr) base_expr.setParseAction(my_var_parser) for s in ['EAX', diff --git a/test/arch/x86/unit/asm_test.py b/test/arch/x86/unit/asm_test.py index bf97dbe4..524791ce 100644 --- a/test/arch/x86/unit/asm_test.py +++ b/test/arch/x86/unit/asm_test.py @@ -1,7 +1,7 @@ import sys import os -from miasm2.core.cpu import parse_ast +from miasm2.core.cpu import ParseAst from miasm2.arch.x86.arch import mn_x86, base_expr, variable from miasm2.core import parse_asm from miasm2.expression.expression import * |