summary refs log tree commit diff stats
path: root/words-count/word_count.py
blob: bbe3a95a9d9a7a71c1b864c8e9f13c547843efeb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from os import path, listdir, makedirs

paths = [ "../results/scraper/box64", "../results/scraper/launchpad-without-comments", "../results/scraper/mailinglist", "../results/scraper/gitlab/issues_text" ]
paths = [ "../results/scraper/launchpad-without-comments", "../results/scraper/mailinglist", "../results/scraper/gitlab/issues_text" ]

def list_files_recursive(directory, basename = False):
    result = []
    if not path.isdir(directory):
        return result
    for entry in listdir(directory):
        full_path = path.join(directory, entry)
        if path.isdir(full_path):
            result = result + list_files_recursive(full_path, basename)
        else:
            if basename:
                result.append(path.basename(full_path))
            else:
                result.append(full_path)
    return result

def main():
    files = []
    for path in paths:
        new_files = list_files_recursive(path)
        print(f"{path} has {len(new_files)} reports")
        files = files + list_files_recursive(path)

    bug_count = len(files)

    word_count = 0
    for path in files:
        with open(path, "r") as file:
            words = len(file.read().split(" "))
            word_count = word_count + words

    avg_word_per_bug = word_count / bug_count
    print(f"Average word per report count: {avg_word_per_bug}")

if __name__ == "__main__":
    main()