diff options
| author | Martin Fink <martin@finkmartin.com> | 2025-09-11 09:19:48 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-11 09:19:48 +0200 |
| commit | 17af5f6fc0538f615b8612dcd2cb77c2affad63f (patch) | |
| tree | 76e4c260123b68b93da2417482024ba11f9838ee /archive/2025/summer/bsc_gerg/src/terminology/session.py | |
| parent | a910d0a3e57f4de47cf2387ac239ae8d0eaca507 (diff) | |
| parent | 3e5d3ca82193e8e8561beb9ceac9982f376d84e2 (diff) | |
| download | research-work-archive-artifacts-17af5f6fc0538f615b8612dcd2cb77c2affad63f.tar.gz research-work-archive-artifacts-17af5f6fc0538f615b8612dcd2cb77c2affad63f.zip | |
Merge pull request #10 from walamana/main
Add bsc_gerg
Diffstat (limited to 'archive/2025/summer/bsc_gerg/src/terminology/session.py')
| -rw-r--r-- | archive/2025/summer/bsc_gerg/src/terminology/session.py | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/archive/2025/summer/bsc_gerg/src/terminology/session.py b/archive/2025/summer/bsc_gerg/src/terminology/session.py new file mode 100644 index 000000000..534c69d6e --- /dev/null +++ b/archive/2025/summer/bsc_gerg/src/terminology/session.py @@ -0,0 +1,108 @@ +import uuid +from typing import Annotated, Optional +from uuid import UUID + +from pydantic import BaseModel, Field + +from src.knowledge.document import Pdf2Text +from src.knowledge.extract import CValue +from src.knowledge.openai.definition.combiner import OpenAIDefinitionCombiner +from src.knowledge.openai.definition.generator import OpenAIDefinitionGenerator +from src.knowledge.openai.extract import OpenAIExtractor +from src.knowledge.openai.lemmatize import OpenAILemmatizer +from src.knowledge.resolver import CSVDefinitionResolver +from src.terminology.event import DocumentAdded, TextExtracted +from src.terminology.terminology import Controller, Blackboard + + +class KnowledgeSourcePolicy(BaseModel): + use_llm: bool = False + pass + +class Session(BaseModel): + id: Annotated[UUID, Field(default_factory=uuid.uuid4)] + policy: KnowledgeSourcePolicy + + def setup_controller_document_processing(self, controller: Controller) -> Controller: + controller.register_knowledge_source(Pdf2Text) + return controller + + def setup_controller_term_extraction(self, controller: Controller) -> Controller: + if self.policy.use_llm: + controller.register_knowledge_source(OpenAIExtractor) + controller.register_knowledge_source(OpenAILemmatizer) + else: + controller.register_knowledge_source(CValue) + return controller + + def setup_controller_definition_generation(self, controller: Controller) -> Controller: + controller.register_knowledge_source(CSVDefinitionResolver) + if self.policy.use_llm: + controller.register_knowledge_source(OpenAIDefinitionGenerator) + controller.register_knowledge_source(OpenAIDefinitionCombiner) + return controller + + + async def process_document(self, file_path: str) -> Blackboard: + controller = Controller() + self.setup_controller_document_processing(controller) + self.setup_controller_term_extraction(controller) + self.setup_controller_definition_generation(controller) + + await controller.emit(DocumentAdded(path=file_path)) + + return controller.blackboard + + + async def retrieve_term_definition(self, text: str, context: Optional[str] = None) -> Blackboard: + controller = Controller() + self.setup_controller_term_extraction(controller) + self.setup_controller_definition_generation(controller) + + # TODO: Make proper use of context!!! + if context is not None: + controller.blackboard.add_text_source(context) + + await controller.emit(TextExtracted(text=text)) + + return controller.blackboard + + async def extract_terminology(self, text: str, context: Optional[str] = None) -> Blackboard: + controller = Controller() + self.setup_controller_term_extraction(controller) + + # TODO: Make proper use of context!!! + if context is not None: + controller.blackboard.add_text_source(context) + + await controller.emit(TextExtracted(text=text)) + + return controller.blackboard + + model_config = { + "arbitrary_types_allowed": True, + } + + +class SessionManager: + + sessions = {} + + @staticmethod + def setup_controller_llm(controller: Controller): + controller.register_knowledge_source(OpenAIExtractor) + # controller.register_knowledge_source(CValue) + controller.register_knowledge_source(OpenAILemmatizer) + # TODO: Occurrence Resolver + # controller.register_knowledge_source(OpenAIDefinitionGenerator) + # controller.register_knowledge_source(OpenAIDefinitionCombiner) + + @classmethod + def create_session(cls, policy: KnowledgeSourcePolicy) -> Session: + session = Session(policy=policy) + cls.sessions[session.id] = session + return session + + @classmethod + def remove_session(cls, session_id: UUID): + cls.sessions.pop(session_id) |