summary refs log tree commit diff stats
path: root/words-count/word_count.py
diff options
context:
space:
mode:
authorChristian Krinitsin <mail@krinitsin.com>2025-09-30 19:28:06 +0200
committerChristian Krinitsin <mail@krinitsin.com>2025-09-30 19:28:06 +0200
commitbf802a572fc3b328dc3f0f2782c6ea9f3b2dd707 (patch)
treed42e8804c8720070d8a3151432d07dc781eec82d /words-count/word_count.py
parentf2ec263023649e596c5076df32c2d328bc9393d2 (diff)
downloademulator-bug-study-main.tar.gz
emulator-bug-study-main.zip
Add script to count average word count of a report HEAD main
Result is used in paper
Diffstat (limited to 'words-count/word_count.py')
-rwxr-xr-xwords-count/word_count.py40
1 files changed, 40 insertions, 0 deletions
diff --git a/words-count/word_count.py b/words-count/word_count.py
new file mode 100755
index 00000000..bbe3a95a
--- /dev/null
+++ b/words-count/word_count.py
@@ -0,0 +1,40 @@
+from os import path, listdir, makedirs
+
+paths = [ "../results/scraper/box64", "../results/scraper/launchpad-without-comments", "../results/scraper/mailinglist", "../results/scraper/gitlab/issues_text" ]
+paths = [ "../results/scraper/launchpad-without-comments", "../results/scraper/mailinglist", "../results/scraper/gitlab/issues_text" ]
+
+def list_files_recursive(directory, basename = False):
+    result = []
+    if not path.isdir(directory):
+        return result
+    for entry in listdir(directory):
+        full_path = path.join(directory, entry)
+        if path.isdir(full_path):
+            result = result + list_files_recursive(full_path, basename)
+        else:
+            if basename:
+                result.append(path.basename(full_path))
+            else:
+                result.append(full_path)
+    return result
+
+def main():
+    files = []
+    for path in paths:
+        new_files = list_files_recursive(path)
+        print(f"{path} has {len(new_files)} reports")
+        files = files + list_files_recursive(path)
+
+    bug_count = len(files)
+
+    word_count = 0
+    for path in files:
+        with open(path, "r") as file:
+            words = len(file.read().split(" "))
+            word_count = word_count + words
+
+    avg_word_per_bug = word_count / bug_count
+    print(f"Average word per report count: {avg_word_per_bug}")
+
+if __name__ == "__main__":
+    main()