2 files changed, 41 insertions, 1 deletions
diff --git a/description_parser.py b/description_parser.py
new file mode 100755
index 000000000..2615b6368
--- /dev/null
+++ b/description_parser.py
@@ -0,0 +1,39 @@
+import re
+from tomlkit import string
+
+def remove_comments(description):
+    return re.sub(r'<!--(.|\n)*?-->', '', description)
+
+def get_headline_content(description, headline):
+    pattern = rf'## {headline}\s+(.*?)(?=##\s|\Z)'
+
+    match = re.search(pattern, description, re.DOTALL)
+    if match:
+        return string(match.group(1).strip(), multiline=True)
+    else:
+        return "n/a"
+
+def get_bullet_point(description, headline, category):
+    pattern = rf'{headline}(?:(?:.|\n)+?){category}:\s+(?:`)?(.+?)(?:`)?(?=\s)(?:\n|$)'
+
+    match = re.search(pattern, description)
+    if match:
+        return match.group(1).strip()
+    else:
+        return "n/a"
+
+def parse_description(desc):
+    desc = remove_comments(desc)
+
+    result = {
+        "host-os": get_bullet_point(desc, "Host", "Operating system"),
+        "host-arch": get_bullet_point(desc, "Host", "Architecture"),
+        "qemu-version": get_bullet_point(desc, "Host", "QEMU version"),
+        "guest-os": get_bullet_point(desc, "Emulated", "Operating system"),
+        "guest-arch": get_bullet_point(desc, "Emulated", "Architecture"),
+        "description": get_headline_content(desc, "Description of problem"),
+        "reproduce": get_headline_content(desc, "Steps to reproduce"),
+        "additional": get_headline_content(desc, "Additional information")
+    }
+
+    return result
diff --git a/downloader.py b/downloader.py
index 444d10c93..5eec7346e 100755
--- a/downloader.py
+++ b/downloader.py
@@ -1,5 +1,6 @@
 from requests import get
 from tomlkit import dumps
+from description_parser import parse_description
 
 project_id = 11167699
 per_page = 100
@@ -24,12 +25,12 @@ def main():
                 "id": i['iid'],
                 "title": i['title'],
                 "state": i['state'],
-                "description": i['description'],
                 "created_at": i['created_at'],
                 "closed_at": i['closed_at'] if i['closed_at'] else "n/a",
                 "labels": i['labels'],
                 "url": i['web_url']
             }
+            issue = issue | parse_description(i['description'])
 
             toml_string = dumps(issue)
             print(toml_string)