From 3e5d3ca82193e8e8561beb9ceac9982f376d84e2 Mon Sep 17 00:00:00 2001 From: Jonas Gerg Date: Tue, 9 Sep 2025 20:06:52 +0200 Subject: Add bsc_gerg --- archive/2025/summer/bsc_gerg/tests/test_extract.py | 39 ++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 archive/2025/summer/bsc_gerg/tests/test_extract.py (limited to 'archive/2025/summer/bsc_gerg/tests/test_extract.py') diff --git a/archive/2025/summer/bsc_gerg/tests/test_extract.py b/archive/2025/summer/bsc_gerg/tests/test_extract.py new file mode 100644 index 000000000..c77cd98f3 --- /dev/null +++ b/archive/2025/summer/bsc_gerg/tests/test_extract.py @@ -0,0 +1,39 @@ +from unittest import TestCase + +from src.knowledge.openai.extract import OpenAIExtractor +from src.terminology.event import TextExtracted, TermExtracted, OccurrenceResolved +from src.terminology.terminology import Blackboard +from tests.util import collect_async + + +class TestTermExtractor(TestCase): + + def test_common(self): + blackboard = Blackboard(terms=[], sources=[]) + extractor = OpenAIExtractor(blackboard=blackboard) + + input = """Einseitig gerichtete Sprecheinrichtung verwenden\n Aufträge dürfen über einseitig gerichtete Sprecheinrichtungen gegeben werden, wenn dies im Einzelfall nicht verboten ist und der Empfänger die Ausführung melden muss oder der Auftraggeber die Ausführung selbst erkennen kann. Meldungen dürfen über einseitig gerichtete Sprecheinrichtungen nicht gegeben werden.""" + + expected = ["Einseitig gerichtete Sprecheinrichtung", "Aufträge", "Empfänger", "Auftraggeber", "Meldungen"] + + initial_event = TextExtracted(text=input) + actual_events = collect_async(extractor.activate(initial_event)) + + actual_events_extracted = [event for event in actual_events if type(event) is TermExtracted] + actual_events_occurrence = [event for event in actual_events if type(event) is OccurrenceResolved] + + actual_terms_text = set(event.term.text.lower() for event in actual_events_extracted) + + missing_terms = [] + for term in expected: + if term.lower() not in actual_terms_text: + missing_terms.append(term) + + if len(missing_terms) > 0: + self.fail(f"Missing terms [{', '.join(missing_terms)}] in extracted events ([{', '.join(actual_terms_text)}]).") + + for term in actual_terms_text: + if len([event for event in actual_events_extracted if event.term.text.lower() == term]) == 0: + self.fail(f"Missing TermExtracted event for term {term}.") + if len([event for event in actual_events_occurrence if event.term.text.lower() == term]) == 0: + self.fail(f"Missing OccurrenceResolved event for term {term}.") -- cgit 1.4.1