diff options
Diffstat (limited to 'miasm2/core')
| -rw-r--r-- | miasm2/core/asm_ast.py | 94 | ||||
| -rw-r--r-- | miasm2/core/cpu.py | 384 | ||||
| -rw-r--r-- | miasm2/core/parse_asm.py | 26 |
3 files changed, 316 insertions, 188 deletions
diff --git a/miasm2/core/asm_ast.py b/miasm2/core/asm_ast.py new file mode 100644 index 00000000..7a365ba1 --- /dev/null +++ b/miasm2/core/asm_ast.py @@ -0,0 +1,94 @@ +class AstNode(object): + """ + Ast node object + """ + def __neg__(self): + if isinstance(self, AstInt): + value = AstInt(-self.value) + else: + value = AstOp('-', self) + return value + + def __add__(self, other): + return AstOp('+', self, other) + + def __sub__(self, other): + return AstOp('-', self, other) + + def __div__(self, other): + return AstOp('/', self, other) + + def __mod__(self, other): + return AstOp('%', self, other) + + def __mul__(self, other): + return AstOp('*', self, other) + + def __lshift__(self, other): + return AstOp('<<', self, other) + + def __rshift__(self, other): + return AstOp('>>', self, other) + + def __xor__(self, other): + return AstOp('^', self, other) + + def __or__(self, other): + return AstOp('|', self, other) + + def __and__(self, other): + return AstOp('&', self, other) + + def __neg__(self): + return AstOp('-', self) + + +class AstInt(AstNode): + """ + Ast integer + """ + def __init__(self, value): + self.value = value + + def __str__(self): + return "%s" % self.value + + +class AstId(AstNode): + """ + Ast Id + """ + def __init__(self, name): + self.name = name + + def __str__(self): + return "%s" % self.name + + +class AstMem(AstNode): + """ + Ast memory deref + """ + def __init__(self, ptr, size): + assert isinstance(ptr, AstNode) + assert isinstance(size, (int, long)) + self.ptr = ptr + self.size = size + + def __str__(self): + return "@%d[%s]" % (self.size, self.ptr) + + +class AstOp(AstNode): + """ + Ast operator + """ + def __init__(self, op, *args): + assert all(isinstance(arg, AstNode) for arg in args) + self.op = op + self.args = args + + def __str__(self): + if len(self.args) == 1: + return "(%s %s)" % (self.op, self.args[0]) + return '(' + ("%s" % self.op).join(str(x) for x in self.args) + ')' diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index 061752f8..ca419458 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -13,6 +13,9 @@ from miasm2.core.bin_stream import bin_stream, bin_stream_str from miasm2.core.utils import Disasm_Exception from miasm2.expression.simplifications import expr_simp + +from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp + log = logging.getLogger("cpuhelper") console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) @@ -85,12 +88,19 @@ def literal_list(l): return o -class reg_info: +class reg_info(object): def __init__(self, reg_str, reg_expr): self.str = reg_str self.expr = reg_expr - self.parser = literal_list(reg_str).setParseAction(self.reg2expr) + self.parser = literal_list(reg_str).setParseAction(self.cb_parse) + + def cb_parse(self, t): + assert len(t) == 1 + i = self.str.index(t[0]) + reg = self.expr[i] + result = AstId(reg) + return result def reg2expr(self, s): i = self.str.index(s[0]) @@ -100,15 +110,21 @@ class reg_info: return self.expr.index(e) - -class reg_info_dct: +class reg_info_dct(object): def __init__(self, reg_expr): self.dct_str_inv = dict((v.name, k) for k, v in reg_expr.iteritems()) self.dct_expr = reg_expr self.dct_expr_inv = dict((v, k) for k, v in reg_expr.iteritems()) reg_str = [v.name for v in reg_expr.itervalues()] - self.parser = literal_list(reg_str).setParseAction(self.reg2expr) + self.parser = literal_list(reg_str).setParseAction(self.cb_parse) + + def cb_parse(self, t): + assert len(t) == 1 + i = self.dct_str_inv[t[0]] + reg = self.dct_expr[i] + result = AstId(reg) + return result def reg2expr(self, s): i = self.dct_str_inv[s[0]] @@ -118,34 +134,30 @@ class reg_info_dct: return self.dct_expr_inv[e] -def gen_reg(rname, env, sz=32): - """ - Gen reg expr and parser - Equivalent to: - PC = ExprId('PC') - reg_pc_str = ['PC'] - reg_pc_expr = [ExprId(x, sz) for x in reg_pc_str] - regpc = reg_info(reg_pc_str, reg_pc_expr) +def gen_reg(reg_name, sz=32): + """Gen reg expr and parser""" + reg_name_lower = reg_name.lower() + reg = m2_expr.ExprId(reg_name, sz) + reginfo = reg_info([reg_name], [reg]) + return reg, reginfo - class bs_rname(m_reg): - reg = regi_rname - bsrname = bs(l=0, cls=(bs_rname,)) +def gen_reg_bs(reg_name, reg_info, base_cls): + """ + Generate: + class bs_reg_name(base_cls): + reg = reg_info + bs_reg_name = bs(l=0, cls=(bs_reg_name,)) """ - rnamel = rname.lower() - r = m2_expr.ExprId(rname, sz) - reg_str = [rname] - reg_expr = [r] - regi = reg_info(reg_str, reg_expr) - # define as global val - cname = "bs_" + rnamel - c = type(cname, (m_reg,), {'reg': regi}) - env[rname] = r - env["regi_" + rnamel] = regi - env[cname] = c - env["bs" + rnamel] = bs(l=0, cls=(c,)) - return r, regi + reg_name_lower = reg_name.lower() + + bs_name = "bs_%s" % reg_name + cls = type(bs_name, base_cls, {'reg': reg_info}) + + bs_obj = bs(l=0, cls=(cls,)) + + return cls, bs_obj def gen_regs(rnames, env, sz=32): @@ -217,125 +229,6 @@ def ast_int2expr(a): return m2_expr.ExprInt(a, 32) - -class ParseAst(object): - - def __init__(self, id2expr, int2expr, default_size=32): - self.id2expr = id2expr - self.int2expr = int2expr - self.default_size = default_size - - def int_from_size(self, size, value): - """Transform a string into ExprInt. - * if @size is None, use provided int2expr - * else, use @size to generate integer - @size: size of int; None if not forced. - @value: string representing an integer - """ - if size is None: - return self.int2expr(value) - else: - return m2_expr.ExprInt(value, size) - - def id_from_size(self, size, value): - """Transform a string into ExprId. - * if @size is None, use provided id2expr - * else, use @size to generate id - @size: size of id; None if not forced. - @value: string representing the id - """ - value = self.id2expr(value) - if isinstance(value, m2_expr.Expr): - return value - if size is None: - size = self.default_size - assert value is not None - return m2_expr.ExprId(asmblock.AsmLabel(value), size) - - def ast_to_expr(self, size, ast): - """Transform a typed ast into a Miasm expression - @size: default size - @ast: typed ast - """ - assert(isinstance(ast, tuple)) - if ast[0] is m2_expr.ExprId: - expr = self.id_from_size(size, ast[1]) - if isinstance(expr, str): - expr = self.id_from_size(size, expr) - elif ast[0] is m2_expr.ExprInt: - expr = self.int_from_size(size, ast[1]) - elif ast[0] is m2_expr.ExprOp: - out = [] - for arg in ast[1]: - if isinstance(arg, tuple): - arg = self.ast_to_expr(size, arg) - out.append(arg) - expr = ast_parse_op(out) - else: - raise TypeError('unknown type') - return expr - - def ast_get_ids(self, ast): - """Retrieve every node of type ExprId in @ast - @ast: typed ast - """ - assert(isinstance(ast, tuple)) - if ast[0] is m2_expr.ExprId: - return set([ast[1]]) - elif ast[0] is m2_expr.ExprInt: - return set() - elif ast[0] is m2_expr.ExprOp: - out = set() - for x in ast[1]: - if isinstance(x, tuple): - out.update(self.ast_get_ids(x)) - return out - raise TypeError('unknown type') - - def _extract_ast_core(self, ast): - assert(isinstance(ast, tuple)) - if ast[0] in [m2_expr.ExprInt, m2_expr.ExprId]: - return ast - elif ast[0] is m2_expr.ExprOp: - out = [] - for arg in ast[1]: - if isinstance(arg, tuple): - arg = self._extract_ast_core(arg) - out.append(arg) - return tuple([ast[0]] + [out]) - else: - raise TypeError('unknown type') - - def extract_ast_core(self, ast): - """ - Trasform an @ast into a Miasm expression. - Use registers size to deduce label and integers sizes. - """ - ast = self._extract_ast_core(ast) - ids = self.ast_get_ids(ast) - ids_expr = [self.id2expr(x) for x in ids] - sizes = set([expr.size for expr in ids_expr - if isinstance(expr, m2_expr.Expr)]) - if not sizes: - size = None - elif len(sizes) == 1: - size = sizes.pop() - else: - # Multiple sizes in ids - raise StopIteration - return self.ast_to_expr(size, ast) - - def __call__(self, ast): - """ - Trasform an @ast into a Miasm expression. - Use registers size to deduce label and integers sizes. - """ - ast = ast[0] - if isinstance(ast, m2_expr.Expr): - return ast - return self.extract_ast_core(ast) - - def neg_int(t): x = -t[0] return x @@ -361,29 +254,154 @@ multop = pyparsing.oneOf('* / %') plusop = pyparsing.oneOf('+ -') -def gen_base_expr(): - variable = pyparsing.Word(pyparsing.alphas + "_$.", - pyparsing.alphanums + "_") - variable.setParseAction(parse_id) - operand = str_int | variable - base_expr = pyparsing.operatorPrecedence(operand, - [("!", 1, pyparsing.opAssoc.RIGHT, parse_op), - (logicop, 2, pyparsing.opAssoc.RIGHT, - parse_op), - (signop, 1, pyparsing.opAssoc.RIGHT, - parse_op), - (multop, 2, pyparsing.opAssoc.LEFT, - parse_op), - (plusop, 2, pyparsing.opAssoc.LEFT, - parse_op), - ]) - return variable, operand, base_expr +########################## +def literal_list(l): + l = l[:] + l.sort() + l = l[::-1] + o = pyparsing.Literal(l[0]) + for x in l[1:]: + o |= pyparsing.Literal(x) + return o + + +def cb_int(t): + assert len(t) == 1 + integer = AstInt(t[0]) + return integer + + +def cb_parse_id(t): + assert len(t) == 1 + reg = t[0] + return AstId(reg) + + +def cb_op_not(t): + tokens = t[0] + assert len(tokens) == 2 + assert tokens[0] == "!" + result = AstOp("!", tokens[1]) + return result + + +def merge_ops(tokens, op): + args = [] + if len(tokens) >= 3: + args = [tokens.pop(0)] + i = 0 + while i < len(tokens): + op_tmp = tokens[i] + arg = tokens[i+1] + i += 2 + if op_tmp != op: + raise ValueError("Bad operator") + args.append(arg) + result = AstOp(op, *args) + return result + + +def cb_op_and(t): + result = merge_ops(t[0], "&") + return result + + +def cb_op_xor(t): + result = merge_ops(t[0], "^") + return result + + +def cb_op_sign(t): + assert len(t) == 1 + op, value = t[0] + return -value + + +def cb_op_div(t): + tokens = t[0] + assert len(tokens) == 3 + assert tokens[1] == "/" + result = AstOp("/", tokens[0], tokens[2]) + return result + + +def cb_op_plusminus(t): + tokens = t[0] + if len(tokens) == 3: + # binary op + assert isinstance(tokens[0], AstNode) + assert isinstance(tokens[2], AstNode) + op, args = tokens[1], [tokens[0], tokens[2]] + elif len(tokens) > 3: + args = [tokens.pop(0)] + i = 0 + while i < len(tokens): + op = tokens[i] + arg = tokens[i+1] + i += 2 + if op == '-': + arg = -arg + elif op == '+': + pass + else: + raise ValueError("Bad operator") + args.append(arg) + op = '+' + else: + raise ValueError("Parsing error") + assert all(isinstance(arg, AstNode) for arg in args) + result = AstOp(op, *args) + return result -variable, operand, base_expr = gen_base_expr() -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) +def cb_op_mul(t): + tokens = t[0] + assert len(tokens) == 3 + assert isinstance(tokens[0], AstNode) + assert isinstance(tokens[2], AstNode) + + # binary op + op, args = tokens[1], [tokens[0], tokens[2]] + result = AstOp(op, *args) + return result + + +integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda t: int(t[0])) +hex_word = pyparsing.Literal('0x') + pyparsing.Word(pyparsing.hexnums) +hex_int = pyparsing.Combine(hex_word).setParseAction(lambda t: int(t[0], 16)) + +str_int_pos = (hex_int | integer) + +str_int = str_int_pos +str_int.setParseAction(cb_int) + +notop = pyparsing.oneOf('!') +andop = pyparsing.oneOf('&') +orop = pyparsing.oneOf('|') +xorop = pyparsing.oneOf('^') +shiftop = pyparsing.oneOf('>> <<') +rotop = pyparsing.oneOf('<<< >>>') +signop = pyparsing.oneOf('+ -') +mulop = pyparsing.oneOf('*') +plusop = pyparsing.oneOf('+ -') +divop = pyparsing.oneOf('/') + + +variable = pyparsing.Word(pyparsing.alphas + "_$.", pyparsing.alphanums + "_") +variable.setParseAction(cb_parse_id) +operand = str_int | variable + +base_expr = pyparsing.operatorPrecedence(operand, + [(notop, 1, pyparsing.opAssoc.RIGHT, cb_op_not), + (andop, 2, pyparsing.opAssoc.RIGHT, cb_op_and), + (xorop, 2, pyparsing.opAssoc.RIGHT, cb_op_xor), + (signop, 1, pyparsing.opAssoc.RIGHT, cb_op_sign), + (mulop, 2, pyparsing.opAssoc.RIGHT, cb_op_mul), + (divop, 2, pyparsing.opAssoc.RIGHT, cb_op_div), + (plusop, 2, pyparsing.opAssoc.LEFT, cb_op_plusminus), + ]) + default_prio = 0x1337 @@ -656,7 +674,7 @@ class bs_swapargs(bs_divert): class m_arg(object): - def fromstring(self, text, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] self.expr = e @@ -665,9 +683,14 @@ class m_arg(object): v, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None - self.expr = v[0] + arg = v[0] + expr = self.asm_ast_to_expr(arg, symbol_pool) + self.expr = expr return start, stop + def asm_ast_to_expr(self, arg, symbol_pool): + raise NotImplementedError("Virtual") + class m_reg(m_arg): prio = default_prio @@ -688,7 +711,7 @@ class reg_noarg(object): reg_info = None parser = None - def fromstring(self, text, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] self.expr = e @@ -697,7 +720,9 @@ class reg_noarg(object): v, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None - self.expr = v[0] + arg = v[0] + expr = self.parses_to_expr(arg, symbol_pool) + self.expr = expr return start, stop def decode(self, v): @@ -1252,7 +1277,7 @@ class cls_mn(object): return out[0] @classmethod - def fromstring(cls, text, mode = None): + def fromstring(cls, text, symbol_pool, mode = None): global total_scans name = re.search('(\S+)', text).groups() if not name: @@ -1291,9 +1316,12 @@ class cls_mn(object): v, start, stop = [None], None, None if start != 0: v, start, stop = [None], None, None - parsers[(i, start_i)][p] = v[0], start, stop - - start, stop = f.fromstring(args_str, parsers[(i, start_i)]) + if v != [None]: + v = f.asm_ast_to_expr(v[0], symbol_pool) + if v is None: + v, start, stop = [None], None, None + parsers[(i, start_i)][p] = v, start, stop + start, stop = f.fromstring(args_str, symbol_pool, parsers[(i, start_i)]) if start != 0: log.debug("cannot fromstring %r", args_str) cannot_parse = True @@ -1532,7 +1560,7 @@ class imm_noarg(object): return None return v - def fromstring(self, text, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] else: @@ -1540,7 +1568,7 @@ class imm_noarg(object): e, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None - if e is None: + if e == [None]: return None, None assert(isinstance(e, m2_expr.Expr)) diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index 8ea0f6b1..df419680 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -3,8 +3,8 @@ import re import miasm2.expression.expression as m2_expr import miasm2.core.asmblock as asmblock -from miasm2.core.cpu import gen_base_expr, ParseAst -from miasm2.core.cpu import instruction +from miasm2.core.cpu import instruction, base_expr +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp declarator = {'byte': 8, 'word': 16, @@ -101,6 +101,16 @@ STATE_NO_BLOC = 0 STATE_IN_BLOC = 1 +def asm_ast_to_expr_with_size(arg, symbol_pool, size): + if isinstance(arg, AstId): + return m2_expr.ExprId(arg.name, size) + if isinstance(arg, AstOp): + args = [asm_ast_to_expr_with_size(tmp, symbol_pool, size) for tmp in arg.args] + return m2_expr.ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return m2_expr.ExprInt(arg.value, size) + return None + def parse_txt(mnemo, attrib, txt, symbol_pool=None): """Parse an assembly listing. Returns a couple (blocks, symbol_pool), where blocks is a list of asm_bloc and symbol_pool the associated AsmSymbolPool @@ -168,16 +178,12 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): expr_list = [] # parser - base_expr = gen_base_expr()[2] - my_var_parser = ParseAst(lambda x: m2_expr.ExprId(x, size), - lambda x: - m2_expr.ExprInt(x, size)) - base_expr.setParseAction(my_var_parser) for element in data_raw: element = element.strip() - element_expr = base_expr.parseString(element)[0] - expr_list.append(element_expr.canonize()) + element_parsed = base_expr.parseString(element)[0] + element_expr = asm_ast_to_expr_with_size(element_parsed, symbol_pool, size) + expr_list.append(element_expr) raw_data = asmblock.AsmRaw(expr_list) raw_data.element_size = size @@ -216,7 +222,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): if ';' in line: line = line[:line.find(';')] line = line.strip(' ').strip('\t') - instr = mnemo.fromstring(line, attrib) + instr = mnemo.fromstring(line, symbol_pool, attrib) # replace orphan AsmLabel with labels from symbol_pool replace_orphan_labels(instr, symbol_pool) |