summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorChristian Krinitsin <mail@krinitsin.com>2025-07-16 14:52:28 +0200
committerChristian Krinitsin <mail@krinitsin.com>2025-07-16 14:52:28 +0200
commit2843bb65aeaeb86eb89bf3d9690db61b9dc6306e (patch)
treeb88d8e4dd702322cd0f27dd3d312b0e99dfdf509
parent35f097a31e1c58892a69178b84ddba658efe9c8f (diff)
downloademulator-bug-study-2843bb65aeaeb86eb89bf3d9690db61b9dc6306e.tar.gz
emulator-bug-study-2843bb65aeaeb86eb89bf3d9690db61b9dc6306e.zip
add a github scraper
Diffstat (limited to '')
-rw-r--r--github/.gitignore1
-rwxr-xr-xgithub/downloader.py44
-rwxr-xr-xgithub/output.py16
3 files changed, 61 insertions, 0 deletions
diff --git a/github/.gitignore b/github/.gitignore
new file mode 100644
index 00000000..11f8666c
--- /dev/null
+++ b/github/.gitignore
@@ -0,0 +1 @@
+issues
diff --git a/github/downloader.py b/github/downloader.py
new file mode 100755
index 00000000..abebfae3
--- /dev/null
+++ b/github/downloader.py
@@ -0,0 +1,44 @@
+from requests import get, Response
+from output import output_issue
+from argparse import ArgumentParser
+
+parser = ArgumentParser(prog='downloader.py')
+parser.add_argument('-r', '--repository', required=True, help="Which repository to download the issues from")
+args = parser.parse_args()
+
+per_page = 100
+url = f"https://api.github.com/repos/{args.repository}/issues?per_page={per_page}&state=all"
+check_url = f"https://api.github.com/repos/{args.repository}"
+
+def pages_iterator(first : Response):
+    current = first
+    while current.links.get('next'):
+        current.raise_for_status()
+        yield current
+        current = get(url = current.links.get('next').get('url'))
+    current.raise_for_status()
+    yield current
+
+def main():
+    check = get(check_url)
+    check.raise_for_status()
+
+    for index, response in enumerate(pages_iterator(get(url))):
+        print(f"Current page: {index+1}")
+
+        data = response.json()
+        for i in data:
+            if "pull_request" in i:
+                continue
+
+            issue = {
+                "id": i['number'],
+                "title": i['title'],
+                "labels": [label['name'] for label in i['labels']],
+                "description": i['body'],
+            }
+
+            output_issue(issue)
+
+if __name__ == "__main__":
+    main()
diff --git a/github/output.py b/github/output.py
new file mode 100755
index 00000000..6d64d71d
--- /dev/null
+++ b/github/output.py
@@ -0,0 +1,16 @@
+from os import path, makedirs
+
+def write_file(file_path : str, string : str) -> None:
+    makedirs(path.dirname(file_path), exist_ok = True)
+    with open(file_path, "w") as file:
+        file.write(string)
+
+def output_issue(issue : dict) -> None:
+    try:
+        if 'documentation' in issue['labels']:
+            write_file(f"issues/documentation/{issue['id']}", issue['title'] + '\n' + (issue['description'] or ""))
+        else:
+            write_file(f"issues/{issue['id']}", issue['title'] + '\n' + (issue['description'] or ""))
+    except TypeError:
+        print(f"error with bug {issue['id']}")
+        exit()