diff options
| author | Christian Krinitsin <mail@krinitsin.com> | 2025-07-16 14:52:28 +0200 |
|---|---|---|
| committer | Christian Krinitsin <mail@krinitsin.com> | 2025-07-16 14:52:28 +0200 |
| commit | 2843bb65aeaeb86eb89bf3d9690db61b9dc6306e (patch) | |
| tree | b88d8e4dd702322cd0f27dd3d312b0e99dfdf509 /github/downloader.py | |
| parent | 35f097a31e1c58892a69178b84ddba658efe9c8f (diff) | |
| download | emulator-bug-study-2843bb65aeaeb86eb89bf3d9690db61b9dc6306e.tar.gz emulator-bug-study-2843bb65aeaeb86eb89bf3d9690db61b9dc6306e.zip | |
add a github scraper
Diffstat (limited to '')
| -rwxr-xr-x | github/downloader.py | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/github/downloader.py b/github/downloader.py new file mode 100755 index 00000000..abebfae3 --- /dev/null +++ b/github/downloader.py @@ -0,0 +1,44 @@ +from requests import get, Response +from output import output_issue +from argparse import ArgumentParser + +parser = ArgumentParser(prog='downloader.py') +parser.add_argument('-r', '--repository', required=True, help="Which repository to download the issues from") +args = parser.parse_args() + +per_page = 100 +url = f"https://api.github.com/repos/{args.repository}/issues?per_page={per_page}&state=all" +check_url = f"https://api.github.com/repos/{args.repository}" + +def pages_iterator(first : Response): + current = first + while current.links.get('next'): + current.raise_for_status() + yield current + current = get(url = current.links.get('next').get('url')) + current.raise_for_status() + yield current + +def main(): + check = get(check_url) + check.raise_for_status() + + for index, response in enumerate(pages_iterator(get(url))): + print(f"Current page: {index+1}") + + data = response.json() + for i in data: + if "pull_request" in i: + continue + + issue = { + "id": i['number'], + "title": i['title'], + "labels": [label['name'] for label in i['labels']], + "description": i['body'], + } + + output_issue(issue) + +if __name__ == "__main__": + main() |