about summary refs log tree commit diff stats
path: root/archive/2025/summer/bsc_gerg/src/terminology/terminology.py
diff options
context:
space:
mode:
authorMartin Fink <martin@finkmartin.com>2025-09-11 09:19:48 +0200
committerGitHub <noreply@github.com>2025-09-11 09:19:48 +0200
commit17af5f6fc0538f615b8612dcd2cb77c2affad63f (patch)
tree76e4c260123b68b93da2417482024ba11f9838ee /archive/2025/summer/bsc_gerg/src/terminology/terminology.py
parenta910d0a3e57f4de47cf2387ac239ae8d0eaca507 (diff)
parent3e5d3ca82193e8e8561beb9ceac9982f376d84e2 (diff)
downloadresearch-work-archive-artifacts-17af5f6fc0538f615b8612dcd2cb77c2affad63f.tar.gz
research-work-archive-artifacts-17af5f6fc0538f615b8612dcd2cb77c2affad63f.zip
Merge pull request #10 from walamana/main
Add bsc_gerg
Diffstat (limited to 'archive/2025/summer/bsc_gerg/src/terminology/terminology.py')
-rw-r--r--archive/2025/summer/bsc_gerg/src/terminology/terminology.py123
1 files changed, 123 insertions, 0 deletions
diff --git a/archive/2025/summer/bsc_gerg/src/terminology/terminology.py b/archive/2025/summer/bsc_gerg/src/terminology/terminology.py
new file mode 100644
index 000000000..30353ed12
--- /dev/null
+++ b/archive/2025/summer/bsc_gerg/src/terminology/terminology.py
@@ -0,0 +1,123 @@
+import asyncio
+import uuid
+from typing import Optional, Annotated, List, AsyncIterable, Type, Any
+from uuid import UUID
+
+from pydantic import BaseModel, Field
+
+from src.terminology.event import Handler, Event, EventDispatcher, DocumentAdded, TextExtracted, TermExtracted, \
+    OccurrenceResolved, PartialDefinitionGenerated, TermNormalized
+from src.terminology.models import Term, TextSource, Definition
+
+
+class Blackboard(BaseModel):
+    terms: Annotated[List[Term], Field(default_factory=list)]
+    sources: Annotated[List[TextSource], Field(default_factory=list)]
+
+    def add_term(self, term: str):
+        term = Term(text=term)
+        self.terms.append(term)
+        return term
+
+    def find_term(self, term_str: str):
+        for term in self.terms:
+            if term.text == term_str:
+                return term
+
+    def add_text_source(self, text: str):
+        source = TextSource(id=uuid.uuid4(), text=text)
+        self.sources.append(source)
+        return source
+
+    def get_text_source(self, id: UUID) -> Optional[TextSource]:
+        for source in self.sources:
+            if source.id == id:
+                return source
+        return None
+
+
+class KnowledgeSource(Handler):
+    blackboard: Blackboard
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+
+class TextExtractor(KnowledgeSource):
+    handles: Annotated[List[Type[Event]], Field(default_factory=lambda: [DocumentAdded])]
+
+    async def activate(self, event: DocumentAdded) -> AsyncIterable[Event]:
+        yield
+
+
+class TermExtractor(KnowledgeSource):
+    handles: Annotated[List[Type[Event]], Field(default_factory=lambda: [TextExtracted])]
+
+    async def activate(self, event: TextExtracted) -> AsyncIterable[Event]:
+        yield
+
+
+class TermNormalizer(KnowledgeSource):
+    handles: Annotated[List[Type[Event]], Field(default_factory=lambda: [TermExtracted])]
+
+    async def activate(self, event: TermExtracted) -> AsyncIterable[Event]:
+        yield
+
+
+class OccurrenceResolver(KnowledgeSource):
+    handles: Annotated[List[Type[Event]], Field(default_factory=lambda: [TermExtracted, TermNormalized])]
+
+    async def activate(self, event: TermExtracted | TermNormalized) -> AsyncIterable[Event]:
+        yield
+
+
+class DefinitionResolver(KnowledgeSource):
+    handles: Annotated[List[Type[Event]], Field(default_factory=lambda: [TermExtracted, TermNormalized])]
+
+    async def activate(self, event: Event) -> AsyncIterable[Event]:
+        yield
+
+
+class DefinitionGenerator(KnowledgeSource):
+    handles: Annotated[List[Type[Event]], Field(default_factory=lambda: [OccurrenceResolved])]
+
+    async def activate(self, event: OccurrenceResolved) -> AsyncIterable[Event]:
+        yield
+
+
+class DefinitionCombiner(KnowledgeSource):
+    handles: Annotated[List[Type[Event]], Field(default_factory=lambda: [PartialDefinitionGenerated])]
+
+    async def activate(self, event: PartialDefinitionGenerated) -> AsyncIterable[Event]:
+        yield
+
+
+class Controller:
+
+    def __init__(self):
+        self.blackboard = Blackboard()
+        self.knowledge_sources = []
+        self.broker = EventDispatcher()
+
+    def register_knowledge_source(self, knowledge_source: Type[KnowledgeSource]):
+        knowledge_source.blackboard = self.blackboard
+        instance = knowledge_source(blackboard=self.blackboard)
+        self.knowledge_sources.append(instance)
+        self.broker.register_handler(instance)
+
+    async def emit(self, event: Event):
+        async with self.broker.task_group:
+            self.broker.emit(event)
+
+    async def analyse_document(self, path: str):
+        async with self.broker.task_group:
+            self.broker.emit(
+                DocumentAdded(path=path)
+            )
+
+    async def start(self):
+        async with self.broker.task_group:
+            self.broker.emit(
+                TextExtracted(text="Der Schrankenwärter muss das Gleis sichern.")
+            )