about summary refs log tree commit diff stats
path: root/src/miasm/core/parse_asm.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/miasm/core/parse_asm.py')
-rw-r--r--src/miasm/core/parse_asm.py288
1 files changed, 288 insertions, 0 deletions
diff --git a/src/miasm/core/parse_asm.py b/src/miasm/core/parse_asm.py
new file mode 100644
index 00000000..79ef416d
--- /dev/null
+++ b/src/miasm/core/parse_asm.py
@@ -0,0 +1,288 @@
+#-*- coding:utf-8 -*-
+import re
+import codecs
+from builtins import range
+
+from miasm.core.utils import force_str
+from miasm.expression.expression import ExprId, ExprInt, ExprOp, LocKey
+import miasm.core.asmblock as asmblock
+from miasm.core.cpu import instruction, base_expr
+from miasm.core.asm_ast import AstInt, AstId, AstOp
+
+declarator = {'byte': 8,
+              'word': 16,
+              'dword': 32,
+              'qword': 64,
+              'long': 32,
+              }
+
+size2pck = {8: 'B',
+            16: 'H',
+            32: 'I',
+            64: 'Q',
+            }
+
+EMPTY_RE = re.compile(r'\s*$')
+COMMENT_RE = re.compile(r'\s*;\S*')
+LOCAL_LABEL_RE = re.compile(r'\s*(\.L\S+)\s*:')
+DIRECTIVE_START_RE = re.compile(r'\s*\.')
+DIRECTIVE_RE = re.compile(r'\s*\.(\S+)')
+LABEL_RE = re.compile(r'\s*(\S+)\s*:')
+FORGET_LABEL_RE = re.compile(r'\s*\.LF[BE]\d\s*:')
+
+
+class Directive(object):
+
+    """Stand for Directive"""
+
+    pass
+
+class DirectiveAlign(Directive):
+
+    """Stand for alignment representation"""
+
+    def __init__(self, alignment=1):
+        self.alignment = alignment
+
+    def __str__(self):
+        return "Alignment %s" % self.alignment
+
+
+class DirectiveSplit(Directive):
+
+    """Stand for alignment representation"""
+
+    pass
+
+
+class DirectiveDontSplit(Directive):
+
+    """Stand for alignment representation"""
+
+    pass
+
+
+STATE_NO_BLOC = 0
+STATE_IN_BLOC = 1
+
+
+def asm_ast_to_expr_with_size(arg, loc_db, size):
+    if isinstance(arg, AstId):
+        return ExprId(force_str(arg.name), size)
+    if isinstance(arg, AstOp):
+        args = [asm_ast_to_expr_with_size(tmp, loc_db, size) for tmp in arg.args]
+        return ExprOp(arg.op, *args)
+    if isinstance(arg, AstInt):
+        return ExprInt(arg.value, size)
+    return None
+
+def parse_txt(mnemo, attrib, txt, loc_db):
+    """Parse an assembly listing. Returns an AsmCfg instance
+
+    @mnemo: architecture used
+    @attrib: architecture attribute
+    @txt: assembly listing
+    @loc_db: the LocationDB instance used to handle labels of the listing
+
+    """
+
+    C_NEXT = asmblock.AsmConstraint.c_next
+    C_TO = asmblock.AsmConstraint.c_to
+
+    lines = []
+    # parse each line
+    for line in txt.split('\n'):
+        # empty
+        if EMPTY_RE.match(line):
+            continue
+        # comment
+        if COMMENT_RE.match(line):
+            continue
+        # labels to forget
+        if FORGET_LABEL_RE.match(line):
+            continue
+        # label beginning with .L
+        match_re = LABEL_RE.match(line)
+        if match_re:
+            label_name = match_re.group(1)
+            label = loc_db.get_or_create_name_location(label_name)
+            lines.append(label)
+            continue
+        # directive
+        if DIRECTIVE_START_RE.match(line):
+            match_re = DIRECTIVE_RE.match(line)
+            directive = match_re.group(1)
+            if directive in ['text', 'data', 'bss']:
+                continue
+            if directive in ['string', 'ascii']:
+                # XXX HACK
+                line = line.replace(r'\n', '\n').replace(r'\r', '\r')
+                raw = line[line.find(r'"') + 1:line.rfind(r'"')]
+                raw = codecs.escape_decode(raw)[0]
+                if directive == 'string':
+                    raw += b"\x00"
+                lines.append(asmblock.AsmRaw(raw))
+                continue
+            if directive == 'ustring':
+                # XXX HACK
+                line = line.replace(r'\n', '\n').replace(r'\r', '\r')
+                raw = line[line.find(r'"') + 1:line.rfind(r'"')] + "\x00"
+                raw = codecs.escape_decode(raw)[0]
+                out = b''
+                for i in range(len(raw)):
+                    out += raw[i:i+1] + b'\x00'
+                lines.append(asmblock.AsmRaw(out))
+                continue
+            if directive in declarator:
+                data_raw = line[match_re.end():].split(' ', 1)[1]
+                data_raw = data_raw.split(',')
+                size = declarator[directive]
+                expr_list = []
+
+                # parser
+
+                for element in data_raw:
+                    element = element.strip()
+                    element_parsed = base_expr.parseString(element)[0]
+                    element_expr = asm_ast_to_expr_with_size(element_parsed, loc_db, size)
+                    expr_list.append(element_expr)
+
+                raw_data = asmblock.AsmRaw(expr_list)
+                raw_data.element_size = size
+                lines.append(raw_data)
+                continue
+            if directive == 'comm':
+                # TODO
+                continue
+            if directive == 'split':  # custom command
+                lines.append(DirectiveSplit())
+                continue
+            if directive == 'dontsplit':  # custom command
+                lines.append(DirectiveDontSplit())
+                continue
+            if directive == "align":
+                align_value = int(line[match_re.end():], 0)
+                lines.append(DirectiveAlign(align_value))
+                continue
+            if directive in ['file', 'intel_syntax', 'globl', 'local',
+                             'type', 'size', 'align', 'ident', 'section']:
+                continue
+            if directive[0:4] == 'cfi_':
+                continue
+
+            raise ValueError("unknown directive %s" % directive)
+
+        # label
+        match_re = LABEL_RE.match(line)
+        if match_re:
+            label_name = match_re.group(1)
+            label = loc_db.get_or_create_name_location(label_name)
+            lines.append(label)
+            continue
+
+        # code
+        if ';' in line:
+            line = line[:line.find(';')]
+        line = line.strip(' ').strip('\t')
+        instr = mnemo.fromstring(line, loc_db, attrib)
+        lines.append(instr)
+
+    asmblock.log_asmblock.info("___pre asm oki___")
+    # make asmcfg
+
+    cur_block = None
+    state = STATE_NO_BLOC
+    i = 0
+    asmcfg = asmblock.AsmCFG(loc_db)
+    block_to_nlink = None
+    delayslot = 0
+    while i < len(lines):
+        if delayslot:
+            delayslot -= 1
+            if delayslot == 0:
+                state = STATE_NO_BLOC
+        line = lines[i]
+        # no current block
+        if state == STATE_NO_BLOC:
+            if isinstance(line, DirectiveDontSplit):
+                block_to_nlink = cur_block
+                i += 1
+                continue
+            elif isinstance(line, DirectiveSplit):
+                block_to_nlink = None
+                i += 1
+                continue
+            elif not isinstance(line, LocKey):
+                # First line must be a label. If it's not the case, generate
+                # it.
+                loc = loc_db.add_location()
+                cur_block = asmblock.AsmBlock(loc_db, loc, alignment=mnemo.alignment)
+            else:
+                cur_block = asmblock.AsmBlock(loc_db, line, alignment=mnemo.alignment)
+                i += 1
+            # Generate the current block
+            asmcfg.add_block(cur_block)
+            state = STATE_IN_BLOC
+            if block_to_nlink:
+                block_to_nlink.addto(
+                    asmblock.AsmConstraint(
+                        cur_block.loc_key,
+                        C_NEXT
+                    )
+                )
+            block_to_nlink = None
+            continue
+
+        # in block
+        elif state == STATE_IN_BLOC:
+            if isinstance(line, DirectiveSplit):
+                state = STATE_NO_BLOC
+                block_to_nlink = None
+            elif isinstance(line, DirectiveDontSplit):
+                state = STATE_NO_BLOC
+                block_to_nlink = cur_block
+            elif isinstance(line, DirectiveAlign):
+                cur_block.alignment = line.alignment
+            elif isinstance(line, asmblock.AsmRaw):
+                cur_block.addline(line)
+                block_to_nlink = cur_block
+            elif isinstance(line, LocKey):
+                if block_to_nlink:
+                    cur_block.addto(
+                        asmblock.AsmConstraint(line, C_NEXT)
+                    )
+                    block_to_nlink = None
+                state = STATE_NO_BLOC
+                continue
+            # instruction
+            elif isinstance(line, instruction):
+                cur_block.addline(line)
+                block_to_nlink = cur_block
+                if not line.breakflow():
+                    i += 1
+                    continue
+                if delayslot:
+                    raise RuntimeError("Cannot have breakflow in delayslot")
+                if line.dstflow():
+                    for dst in line.getdstflow(loc_db):
+                        if not isinstance(dst, ExprId):
+                            continue
+                        if dst in mnemo.regs.all_regs_ids:
+                            continue
+                        cur_block.addto(asmblock.AsmConstraint(dst.name, C_TO))
+
+                if not line.splitflow():
+                    block_to_nlink = None
+
+                delayslot = line.delayslot + 1
+            else:
+                raise RuntimeError("unknown class %s" % line.__class__)
+        i += 1
+
+    for block in asmcfg.blocks:
+        # Fix multiple constraints
+        block.fix_constraints()
+
+        # Log block
+        asmblock.log_asmblock.info(block)
+    return asmcfg