about summary refs log tree commit diff stats
path: root/miasm2/core/parse_asm.py
diff options
context:
space:
mode:
Diffstat (limited to 'miasm2/core/parse_asm.py')
-rw-r--r--miasm2/core/parse_asm.py319
1 files changed, 180 insertions, 139 deletions
diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py
index 1df8e85b..646ad445 100644
--- a/miasm2/core/parse_asm.py
+++ b/miasm2/core/parse_asm.py
@@ -5,6 +5,7 @@ import re
 import miasm2.expression.expression as m2_expr
 import miasm2.core.asmbloc as asmbloc
 from miasm2.core.cpu import gen_base_expr, parse_ast
+from miasm2.core.cpu import instruction
 
 declarator = {'byte': 8,
               'word': 16,
@@ -19,7 +20,23 @@ size2pck = {8: 'B',
             64: 'Q',
             }
 
-class DirectiveAlign(object):
+EMPTY_RE = re.compile(r'\s*$')
+COMMENT_RE = re.compile(r'\s*;\S*')
+LOCAL_LABEL_RE = re.compile(r'\s*(\.L\S+)\s*:')
+DIRECTIVE_START_RE = re.compile(r'\s*\.')
+DIRECTIVE_RE = re.compile(r'\s*\.(\S+)')
+LABEL_RE = re.compile(r'\s*(\S+)\s*:')
+FORGET_LABEL_RE = re.compile(r'\s*\.LF[BE]\d\s*:')
+
+
+class Directive(object):
+
+    """Stand for Directive"""
+
+    pass
+
+class DirectiveAlign(Directive):
+
     """Stand for alignment representation"""
 
     def __init__(self, alignment=1):
@@ -28,16 +45,34 @@ class DirectiveAlign(object):
     def __str__(self):
         return "Alignment %s" % self.alignment
 
-def guess_next_new_label(symbol_pool, gen_label_index=0):
+
+class DirectiveSplit(Directive):
+
+    """Stand for alignment representation"""
+
+    pass
+
+
+class DirectiveDontSplit(Directive):
+
+    """Stand for alignment representation"""
+
+    pass
+
+
+def guess_next_new_label(symbol_pool):
+    """Generate a new label
+    @symbol_pool: the asm_symbol_pool instance"""
     i = 0
     gen_name = "loc_%.8X"
     while True:
         name = gen_name % i
-        l = symbol_pool.getby_name(name)
-        if l is None:
+        label = symbol_pool.getby_name(name)
+        if label is None:
             return symbol_pool.add_label(name)
         i += 1
 
+
 def replace_expr_labels(expr, symbol_pool, replace_id):
     """Create asm_label of the expression @expr in the @symbol_pool
     Update @replace_id"""
@@ -51,66 +86,69 @@ def replace_expr_labels(expr, symbol_pool, replace_id):
     replace_id[expr] = m2_expr.ExprId(new_lbl, expr.size)
     return replace_id[expr]
 
+
 def replace_orphan_labels(instr, symbol_pool):
     """Link orphan labels used by @instr to the @symbol_pool"""
 
     for i, arg in enumerate(instr.args):
         replace_id = {}
-        arg.visit(lambda e:replace_expr_labels(e,
-                                               symbol_pool,
-                                               replace_id))
+        arg.visit(lambda e: replace_expr_labels(e,
+                                                symbol_pool,
+                                                replace_id))
         instr.args[i] = instr.args[i].replace_expr(replace_id)
 
 
+STATE_NO_BLOC = 0
+STATE_IN_BLOC = 1
+
+
+def parse_txt(mnemo, attrib, txt, symbol_pool=None):
+    """Parse an assembly listing. Returns a couple (blocks, symbol_pool), where
+    blocks is a list of asm_bloc and symbol_pool the associated asm_symbol_pool
+
+    @mnemo: architecture used
+    @attrib: architecture attribute
+    @txt: assembly listing
+    @symbol_pool: (optional) the asm_symbol_pool instance used to handle labels
+    of the listing
+
+    """
 
-def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0):
     if symbol_pool is None:
         symbol_pool = asmbloc.asm_symbol_pool()
 
-    lines_text = []
-    lines_data = []
-    lines_bss = []
-
     C_NEXT = asmbloc.asm_constraint.c_next
     C_TO = asmbloc.asm_constraint.c_to
 
-    lines = lines_text
+    lines = []
     # parse each line
     for line in txt.split('\n'):
         # empty
-        if re.match(r'\s*$', line):
+        if EMPTY_RE.match(line):
             continue
         # comment
-        if re.match(r'\s*;\S*', line):
+        if COMMENT_RE.match(line):
             continue
         # labels to forget
-        r = re.match(r'\s*\.LF[BE]\d\s*:', line)
-        if r:
+        if FORGET_LABEL_RE.match(line):
             continue
         # label beginning with .L
-        r = re.match(r'\s*(\.L\S+)\s*:', line)
-        if r:
-            l = r.groups()[0]
-            l = symbol_pool.getby_name_create(l)
-            lines.append(l)
+        match_re = LABEL_RE.match(line)
+        if match_re:
+            label_name = match_re.group(1)
+            label = symbol_pool.getby_name_create(label_name)
+            lines.append(label)
             continue
         # directive
-        if re.match(r'\s*\.', line):
-            r = re.match(r'\s*\.(\S+)', line)
-            directive = r.groups()[0]
-            if directive == 'text':
-                lines = lines_text
-                continue
-            if directive == 'data':
-                lines = lines_data
-                continue
-            if directive == 'bss':
-                lines = lines_bss
+        if DIRECTIVE_START_RE.match(line):
+            match_re = DIRECTIVE_RE.match(line)
+            directive = match_re.group(1)
+            if directive in ['text', 'data', 'bss']:
                 continue
             if directive in ['string', 'ascii']:
                 # XXX HACK
                 line = line.replace(r'\n', '\n').replace(r'\r', '\r')
-                raw = line[line.find(r'"') + 1:line.rfind(r"'")]
+                raw = line[line.find(r'"') + 1:line.rfind(r'"')]
                 raw = raw.decode('string_escape')
                 if directive == 'string':
                     raw += "\x00"
@@ -119,16 +157,16 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0):
             if directive == 'ustring':
                 # XXX HACK
                 line = line.replace(r'\n', '\n').replace(r'\r', '\r')
-                raw = line[line.find(r'"') + 1:line.rfind(r"'")] + "\x00"
+                raw = line[line.find(r'"') + 1:line.rfind(r'"')] + "\x00"
                 raw = raw.decode('string_escape')
                 raw = "".join([string + '\x00' for string in raw])
                 lines.append(asmbloc.asm_raw(raw))
                 continue
             if directive in declarator:
-                data_raw = line[r.end():].split(' ', 1)[1]
+                data_raw = line[match_re.end():].split(' ', 1)[1]
                 data_raw = data_raw.split(',')
                 size = declarator[directive]
-                data_int = []
+                expr_list = []
 
                 # parser
                 base_expr = gen_base_expr()[2]
@@ -137,29 +175,26 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0):
                                               m2_expr.ExprInt(x, size))
                 base_expr.setParseAction(my_var_parser)
 
-                for b in data_raw:
-                    b = b.strip()
-                    x = base_expr.parseString(b)[0]
-                    data_int.append(x.canonize())
+                for element in data_raw:
+                    element = element.strip()
+                    element_expr = base_expr.parseString(element)[0]
+                    expr_list.append(element_expr.canonize())
 
-                raw = data_int
-                x = asmbloc.asm_raw(raw)
-                x.element_size = size
-                lines.append(x)
+                raw_data = asmbloc.asm_raw(expr_list)
+                raw_data.element_size = size
+                lines.append(raw_data)
                 continue
             if directive == 'comm':
                 # TODO
                 continue
             if directive == 'split':  # custom command
-                x = asmbloc.asm_raw()
-                x.split = True
-                lines.append(x)
+                lines.append(DirectiveSplit())
                 continue
             if directive == 'dontsplit':  # custom command
-                lines.append(asmbloc.asm_raw())
+                lines.append(DirectiveDontSplit())
                 continue
             if directive == "align":
-                align_value = int(line[r.end():])
+                align_value = int(line[match_re.end():], 0)
                 lines.append(DirectiveAlign(align_value))
                 continue
             if directive in ['file', 'intel_syntax', 'globl', 'local',
@@ -171,11 +206,11 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0):
             raise ValueError("unknown directive %s" % str(directive))
 
         # label
-        r = re.match(r'\s*(\S+)\s*:', line)
-        if r:
-            l = r.groups()[0]
-            l = symbol_pool.getby_name_create(l)
-            lines.append(l)
+        match_re = LABEL_RE.match(line)
+        if match_re:
+            label_name = match_re.group(1)
+            label = symbol_pool.getby_name_create(label_name)
+            lines.append(label)
             continue
 
         # code
@@ -192,91 +227,97 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0):
         lines.append(instr)
 
     asmbloc.log_asmbloc.info("___pre asm oki___")
-    # make blocs
-
-    blocs_sections = []
-    bloc_num = 0
-    b = None
-    for lines in [lines_text, lines_data, lines_bss]:
-        state = 0
-        i = 0
-        blocs = []
-        blocs_sections.append(blocs)
-        bloc_to_nlink = None
-        block_may_link = False
-        while i < len(lines):
-            # no current bloc
-            if state == 0:
-                if not isinstance(lines[i], asmbloc.asm_label):
-                    l = guess_next_new_label(symbol_pool)
-                    lines[i:i] = [l]
-                else:
-                    l = lines[i]
-                    b = asmbloc.asm_bloc(l, alignment=mnemo.alignment)
-                    b.bloc_num = bloc_num
-                    bloc_num += 1
-                    blocs.append(b)
-                    state = 1
-                    i += 1
-                    if bloc_to_nlink:
-                        bloc_to_nlink.addto(asmbloc.asm_constraint(b.label,
-                                                                   C_NEXT))
-                        bloc_to_nlink = None
-
-            # in bloc
-            elif state == 1:
-                if isinstance(lines[i], asmbloc.asm_raw):
-                    if hasattr(lines[i], 'split'):
-                        state = 0
-                        block_may_link = False
-                        i += 1
-                    else:
-                        state = 1
-                        block_may_link = True
-                        b.addline(lines[i])
-                        i += 1
-                elif isinstance(lines[i], DirectiveAlign):
-                    b.alignment = lines[i].alignment
-                    i += 1
-                # asmbloc.asm_label
-                elif isinstance(lines[i], asmbloc.asm_label):
-                    if block_may_link:
-                        b.addto(
-                            asmbloc.asm_constraint(lines[i], C_NEXT))
-                        block_may_link = False
-                    state = 0
-                # instruction
-                else:
-                    b.addline(lines[i])
-                    if lines[i].dstflow():
-                        for x in lines[i].getdstflow(symbol_pool):
-                            if not isinstance(x, m2_expr.ExprId):
-                                continue
-                            if x in mnemo.regs.all_regs_ids:
-                                continue
-                            b.addto(asmbloc.asm_constraint(x, C_TO))
-
-                        # TODO XXX redo this really
-
-                        if not lines[i].breakflow() and i + 1 < len(lines):
-                            if isinstance(lines[i + 1], asmbloc.asm_label):
-                                l = lines[i + 1]
-                            else:
-                                l = guess_next_new_label(symbol_pool)
-                                lines[i + 1:i + 1] = [l]
-                        else:
-                            state = 0
-
-                        if lines[i].splitflow():
-                            bloc_to_nlink = b
-                    if not lines[i].breakflow() or lines[i].splitflow():
-                        block_may_link = True
-                    else:
-                        block_may_link = False
+    # make blocks
 
+    cur_block = None
+    state = STATE_NO_BLOC
+    i = 0
+    blocks = []
+    block_to_nlink = None
+    block_may_link = False
+    delayslot = 0
+    while i < len(lines):
+        if delayslot:
+            if delayslot == 0:
+                state = STATE_NO_BLOC
+            else:
+                delayslot -= 1
+        line = lines[i]
+        # no current block
+        if state == STATE_NO_BLOC:
+            if isinstance(line, DirectiveDontSplit):
+                block_to_nlink = cur_block
+                i += 1
+                continue
+            elif isinstance(line, DirectiveSplit):
+                block_to_nlink = None
+                i += 1
+                continue
+            elif not isinstance(line, asmbloc.asm_label):
+                # First line must be a label. If it's not the case, generate
+                # it.
+                label = guess_next_new_label(symbol_pool)
+                cur_block = asmbloc.asm_bloc(label, alignment=mnemo.alignment)
+            else:
+                cur_block = asmbloc.asm_bloc(line, alignment=mnemo.alignment)
+                i += 1
+            # Generate the current bloc
+            blocks.append(cur_block)
+            state = STATE_IN_BLOC
+            if block_to_nlink:
+                block_to_nlink.addto(
+                    asmbloc.asm_constraint(cur_block.label,
+                                           C_NEXT))
+            block_to_nlink = None
+            continue
+
+        # in block
+        elif state == STATE_IN_BLOC:
+            if isinstance(line, DirectiveSplit):
+                state = STATE_NO_BLOC
+                block_to_nlink = None
+            elif isinstance(line, DirectiveDontSplit):
+                state = STATE_NO_BLOC
+                block_to_nlink = cur_block
+            elif isinstance(line, DirectiveAlign):
+                cur_block.alignment = line.alignment
+            elif isinstance(line, asmbloc.asm_raw):
+                cur_block.addline(line)
+                block_to_nlink = cur_block
+            elif isinstance(line, asmbloc.asm_label):
+                if block_to_nlink:
+                    cur_block.addto(
+                        asmbloc.asm_constraint(line, C_NEXT))
+                    block_to_nlink = None
+                state = STATE_NO_BLOC
+                continue
+            # instruction
+            elif isinstance(line, instruction):
+                cur_block.addline(line)
+                block_to_nlink = cur_block
+                if not line.breakflow():
                     i += 1
+                    continue
+                if delayslot:
+                    raise RuntimeError("Cannot have breakflow in delayslot")
+                if line.dstflow():
+                    for dst in line.getdstflow(symbol_pool):
+                        if not isinstance(dst, m2_expr.ExprId):
+                            continue
+                        if dst in mnemo.regs.all_regs_ids:
+                            continue
+                        cur_block.addto(asmbloc.asm_constraint(dst, C_TO))
+
+                if not line.splitflow():
+                    block_to_nlink = None
+
+                delayslot = line.delayslot
+                if delayslot == 0:
+                    state = STATE_NO_BLOC
+            else:
+                raise RuntimeError("unknown class %s" % line.__class__)
+        i += 1
 
-    for block in blocs_sections[0]:
+    for block in blocks:
         asmbloc.log_asmbloc.info(block)
-
-    return blocs_sections, symbol_pool
+    return blocks, symbol_pool