about summary refs log tree commit diff stats
path: root/archive/2025/summer/bsc_gerg/src/terminology/session.py
diff options
context:
space:
mode:
authorMartin Fink <martin@finkmartin.com>2025-09-11 09:19:48 +0200
committerGitHub <noreply@github.com>2025-09-11 09:19:48 +0200
commit17af5f6fc0538f615b8612dcd2cb77c2affad63f (patch)
tree76e4c260123b68b93da2417482024ba11f9838ee /archive/2025/summer/bsc_gerg/src/terminology/session.py
parenta910d0a3e57f4de47cf2387ac239ae8d0eaca507 (diff)
parent3e5d3ca82193e8e8561beb9ceac9982f376d84e2 (diff)
downloadresearch-work-archive-artifacts-17af5f6fc0538f615b8612dcd2cb77c2affad63f.tar.gz
research-work-archive-artifacts-17af5f6fc0538f615b8612dcd2cb77c2affad63f.zip
Merge pull request #10 from walamana/main
Add bsc_gerg
Diffstat (limited to 'archive/2025/summer/bsc_gerg/src/terminology/session.py')
-rw-r--r--archive/2025/summer/bsc_gerg/src/terminology/session.py108
1 files changed, 108 insertions, 0 deletions
diff --git a/archive/2025/summer/bsc_gerg/src/terminology/session.py b/archive/2025/summer/bsc_gerg/src/terminology/session.py
new file mode 100644
index 000000000..534c69d6e
--- /dev/null
+++ b/archive/2025/summer/bsc_gerg/src/terminology/session.py
@@ -0,0 +1,108 @@
+import uuid
+from typing import Annotated, Optional
+from uuid import UUID
+
+from pydantic import BaseModel, Field
+
+from src.knowledge.document import Pdf2Text
+from src.knowledge.extract import CValue
+from src.knowledge.openai.definition.combiner import OpenAIDefinitionCombiner
+from src.knowledge.openai.definition.generator import OpenAIDefinitionGenerator
+from src.knowledge.openai.extract import OpenAIExtractor
+from src.knowledge.openai.lemmatize import OpenAILemmatizer
+from src.knowledge.resolver import CSVDefinitionResolver
+from src.terminology.event import DocumentAdded, TextExtracted
+from src.terminology.terminology import Controller, Blackboard
+
+
+class KnowledgeSourcePolicy(BaseModel):
+    use_llm: bool = False
+    pass
+
+class Session(BaseModel):
+    id: Annotated[UUID, Field(default_factory=uuid.uuid4)]
+    policy: KnowledgeSourcePolicy
+
+    def setup_controller_document_processing(self, controller: Controller) -> Controller:
+        controller.register_knowledge_source(Pdf2Text)
+        return controller
+
+    def setup_controller_term_extraction(self, controller: Controller) -> Controller:
+        if self.policy.use_llm:
+            controller.register_knowledge_source(OpenAIExtractor)
+            controller.register_knowledge_source(OpenAILemmatizer)
+        else:
+            controller.register_knowledge_source(CValue)
+        return controller
+
+    def setup_controller_definition_generation(self, controller: Controller) -> Controller:
+        controller.register_knowledge_source(CSVDefinitionResolver)
+        if self.policy.use_llm:
+            controller.register_knowledge_source(OpenAIDefinitionGenerator)
+            controller.register_knowledge_source(OpenAIDefinitionCombiner)
+        return controller
+
+
+    async def process_document(self, file_path: str) -> Blackboard:
+        controller = Controller()
+        self.setup_controller_document_processing(controller)
+        self.setup_controller_term_extraction(controller)
+        self.setup_controller_definition_generation(controller)
+
+        await controller.emit(DocumentAdded(path=file_path))
+
+        return controller.blackboard
+
+
+    async def retrieve_term_definition(self, text: str, context: Optional[str] = None) -> Blackboard:
+        controller = Controller()
+        self.setup_controller_term_extraction(controller)
+        self.setup_controller_definition_generation(controller)
+
+        # TODO: Make proper use of context!!!
+        if context is not None:
+            controller.blackboard.add_text_source(context)
+
+        await controller.emit(TextExtracted(text=text))
+
+        return controller.blackboard
+
+    async def extract_terminology(self, text: str, context: Optional[str] = None) -> Blackboard:
+        controller = Controller()
+        self.setup_controller_term_extraction(controller)
+
+        # TODO: Make proper use of context!!!
+        if context is not None:
+            controller.blackboard.add_text_source(context)
+
+        await controller.emit(TextExtracted(text=text))
+
+        return controller.blackboard
+
+    model_config = {
+        "arbitrary_types_allowed": True,
+    }
+
+
+class SessionManager:
+
+    sessions = {}
+
+    @staticmethod
+    def setup_controller_llm(controller: Controller):
+        controller.register_knowledge_source(OpenAIExtractor)
+        # controller.register_knowledge_source(CValue)
+        controller.register_knowledge_source(OpenAILemmatizer)
+        # TODO: Occurrence Resolver
+        # controller.register_knowledge_source(OpenAIDefinitionGenerator)
+        # controller.register_knowledge_source(OpenAIDefinitionCombiner)
+
+    @classmethod
+    def create_session(cls, policy: KnowledgeSourcePolicy) -> Session:
+        session = Session(policy=policy)
+        cls.sessions[session.id] = session
+        return session
+
+    @classmethod
+    def remove_session(cls, session_id: UUID):
+        cls.sessions.pop(session_id)