From b2eaaabfd0fe50d3fab0d00d753193edc572f3c4 Mon Sep 17 00:00:00 2001 From: Christian Krinitsin Date: Fri, 16 May 2025 10:48:21 +0200 Subject: add issue description parser using regex --- description_parser.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100755 description_parser.py (limited to 'description_parser.py') diff --git a/description_parser.py b/description_parser.py new file mode 100755 index 000000000..2615b6368 --- /dev/null +++ b/description_parser.py @@ -0,0 +1,39 @@ +import re +from tomlkit import string + +def remove_comments(description): + return re.sub(r'', '', description) + +def get_headline_content(description, headline): + pattern = rf'## {headline}\s+(.*?)(?=##\s|\Z)' + + match = re.search(pattern, description, re.DOTALL) + if match: + return string(match.group(1).strip(), multiline=True) + else: + return "n/a" + +def get_bullet_point(description, headline, category): + pattern = rf'{headline}(?:(?:.|\n)+?){category}:\s+(?:`)?(.+?)(?:`)?(?=\s)(?:\n|$)' + + match = re.search(pattern, description) + if match: + return match.group(1).strip() + else: + return "n/a" + +def parse_description(desc): + desc = remove_comments(desc) + + result = { + "host-os": get_bullet_point(desc, "Host", "Operating system"), + "host-arch": get_bullet_point(desc, "Host", "Architecture"), + "qemu-version": get_bullet_point(desc, "Host", "QEMU version"), + "guest-os": get_bullet_point(desc, "Emulated", "Operating system"), + "guest-arch": get_bullet_point(desc, "Emulated", "Architecture"), + "description": get_headline_content(desc, "Description of problem"), + "reproduce": get_headline_content(desc, "Steps to reproduce"), + "additional": get_headline_content(desc, "Additional information") + } + + return result -- cgit 1.4.1