diff options
| -rwxr-xr-x | description_parser.py | 39 | ||||
| -rwxr-xr-x | downloader.py | 3 |
2 files changed, 41 insertions, 1 deletions
diff --git a/description_parser.py b/description_parser.py new file mode 100755 index 000000000..2615b6368 --- /dev/null +++ b/description_parser.py @@ -0,0 +1,39 @@ +import re +from tomlkit import string + +def remove_comments(description): + return re.sub(r'<!--(.|\n)*?-->', '', description) + +def get_headline_content(description, headline): + pattern = rf'## {headline}\s+(.*?)(?=##\s|\Z)' + + match = re.search(pattern, description, re.DOTALL) + if match: + return string(match.group(1).strip(), multiline=True) + else: + return "n/a" + +def get_bullet_point(description, headline, category): + pattern = rf'{headline}(?:(?:.|\n)+?){category}:\s+(?:`)?(.+?)(?:`)?(?=\s)(?:\n|$)' + + match = re.search(pattern, description) + if match: + return match.group(1).strip() + else: + return "n/a" + +def parse_description(desc): + desc = remove_comments(desc) + + result = { + "host-os": get_bullet_point(desc, "Host", "Operating system"), + "host-arch": get_bullet_point(desc, "Host", "Architecture"), + "qemu-version": get_bullet_point(desc, "Host", "QEMU version"), + "guest-os": get_bullet_point(desc, "Emulated", "Operating system"), + "guest-arch": get_bullet_point(desc, "Emulated", "Architecture"), + "description": get_headline_content(desc, "Description of problem"), + "reproduce": get_headline_content(desc, "Steps to reproduce"), + "additional": get_headline_content(desc, "Additional information") + } + + return result diff --git a/downloader.py b/downloader.py index 444d10c93..5eec7346e 100755 --- a/downloader.py +++ b/downloader.py @@ -1,5 +1,6 @@ from requests import get from tomlkit import dumps +from description_parser import parse_description project_id = 11167699 per_page = 100 @@ -24,12 +25,12 @@ def main(): "id": i['iid'], "title": i['title'], "state": i['state'], - "description": i['description'], "created_at": i['created_at'], "closed_at": i['closed_at'] if i['closed_at'] else "n/a", "labels": i['labels'], "url": i['web_url'] } + issue = issue | parse_description(i['description']) toml_string = dumps(issue) print(toml_string) |