diff options
| author | Martin Fink <martin@finkmartin.com> | 2025-09-11 09:19:48 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-11 09:19:48 +0200 |
| commit | 17af5f6fc0538f615b8612dcd2cb77c2affad63f (patch) | |
| tree | 76e4c260123b68b93da2417482024ba11f9838ee /archive/2025/summer/bsc_gerg/src/terminology/terminology.py | |
| parent | a910d0a3e57f4de47cf2387ac239ae8d0eaca507 (diff) | |
| parent | 3e5d3ca82193e8e8561beb9ceac9982f376d84e2 (diff) | |
| download | research-work-archive-artifacts-17af5f6fc0538f615b8612dcd2cb77c2affad63f.tar.gz research-work-archive-artifacts-17af5f6fc0538f615b8612dcd2cb77c2affad63f.zip | |
Merge pull request #10 from walamana/main
Add bsc_gerg
Diffstat (limited to 'archive/2025/summer/bsc_gerg/src/terminology/terminology.py')
| -rw-r--r-- | archive/2025/summer/bsc_gerg/src/terminology/terminology.py | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/archive/2025/summer/bsc_gerg/src/terminology/terminology.py b/archive/2025/summer/bsc_gerg/src/terminology/terminology.py new file mode 100644 index 000000000..30353ed12 --- /dev/null +++ b/archive/2025/summer/bsc_gerg/src/terminology/terminology.py @@ -0,0 +1,123 @@ +import asyncio +import uuid +from typing import Optional, Annotated, List, AsyncIterable, Type, Any +from uuid import UUID + +from pydantic import BaseModel, Field + +from src.terminology.event import Handler, Event, EventDispatcher, DocumentAdded, TextExtracted, TermExtracted, \ + OccurrenceResolved, PartialDefinitionGenerated, TermNormalized +from src.terminology.models import Term, TextSource, Definition + + +class Blackboard(BaseModel): + terms: Annotated[List[Term], Field(default_factory=list)] + sources: Annotated[List[TextSource], Field(default_factory=list)] + + def add_term(self, term: str): + term = Term(text=term) + self.terms.append(term) + return term + + def find_term(self, term_str: str): + for term in self.terms: + if term.text == term_str: + return term + + def add_text_source(self, text: str): + source = TextSource(id=uuid.uuid4(), text=text) + self.sources.append(source) + return source + + def get_text_source(self, id: UUID) -> Optional[TextSource]: + for source in self.sources: + if source.id == id: + return source + return None + + +class KnowledgeSource(Handler): + blackboard: Blackboard + + class Config: + arbitrary_types_allowed = True + + + +class TextExtractor(KnowledgeSource): + handles: Annotated[List[Type[Event]], Field(default_factory=lambda: [DocumentAdded])] + + async def activate(self, event: DocumentAdded) -> AsyncIterable[Event]: + yield + + +class TermExtractor(KnowledgeSource): + handles: Annotated[List[Type[Event]], Field(default_factory=lambda: [TextExtracted])] + + async def activate(self, event: TextExtracted) -> AsyncIterable[Event]: + yield + + +class TermNormalizer(KnowledgeSource): + handles: Annotated[List[Type[Event]], Field(default_factory=lambda: [TermExtracted])] + + async def activate(self, event: TermExtracted) -> AsyncIterable[Event]: + yield + + +class OccurrenceResolver(KnowledgeSource): + handles: Annotated[List[Type[Event]], Field(default_factory=lambda: [TermExtracted, TermNormalized])] + + async def activate(self, event: TermExtracted | TermNormalized) -> AsyncIterable[Event]: + yield + + +class DefinitionResolver(KnowledgeSource): + handles: Annotated[List[Type[Event]], Field(default_factory=lambda: [TermExtracted, TermNormalized])] + + async def activate(self, event: Event) -> AsyncIterable[Event]: + yield + + +class DefinitionGenerator(KnowledgeSource): + handles: Annotated[List[Type[Event]], Field(default_factory=lambda: [OccurrenceResolved])] + + async def activate(self, event: OccurrenceResolved) -> AsyncIterable[Event]: + yield + + +class DefinitionCombiner(KnowledgeSource): + handles: Annotated[List[Type[Event]], Field(default_factory=lambda: [PartialDefinitionGenerated])] + + async def activate(self, event: PartialDefinitionGenerated) -> AsyncIterable[Event]: + yield + + +class Controller: + + def __init__(self): + self.blackboard = Blackboard() + self.knowledge_sources = [] + self.broker = EventDispatcher() + + def register_knowledge_source(self, knowledge_source: Type[KnowledgeSource]): + knowledge_source.blackboard = self.blackboard + instance = knowledge_source(blackboard=self.blackboard) + self.knowledge_sources.append(instance) + self.broker.register_handler(instance) + + async def emit(self, event: Event): + async with self.broker.task_group: + self.broker.emit(event) + + async def analyse_document(self, path: str): + async with self.broker.task_group: + self.broker.emit( + DocumentAdded(path=path) + ) + + async def start(self): + async with self.broker.task_group: + self.broker.emit( + TextExtracted(text="Der Schrankenwärter muss das Gleis sichern.") + ) |