about summary refs log tree commit diff stats
path: root/archive/2025/summer/bsc_gerg/src/terminology/session.py
blob: 534c69d6eb9d3c6cefe38b5b07d4cb6d67ffaaa4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import uuid
from typing import Annotated, Optional
from uuid import UUID

from pydantic import BaseModel, Field

from src.knowledge.document import Pdf2Text
from src.knowledge.extract import CValue
from src.knowledge.openai.definition.combiner import OpenAIDefinitionCombiner
from src.knowledge.openai.definition.generator import OpenAIDefinitionGenerator
from src.knowledge.openai.extract import OpenAIExtractor
from src.knowledge.openai.lemmatize import OpenAILemmatizer
from src.knowledge.resolver import CSVDefinitionResolver
from src.terminology.event import DocumentAdded, TextExtracted
from src.terminology.terminology import Controller, Blackboard


class KnowledgeSourcePolicy(BaseModel):
    use_llm: bool = False
    pass

class Session(BaseModel):
    id: Annotated[UUID, Field(default_factory=uuid.uuid4)]
    policy: KnowledgeSourcePolicy

    def setup_controller_document_processing(self, controller: Controller) -> Controller:
        controller.register_knowledge_source(Pdf2Text)
        return controller

    def setup_controller_term_extraction(self, controller: Controller) -> Controller:
        if self.policy.use_llm:
            controller.register_knowledge_source(OpenAIExtractor)
            controller.register_knowledge_source(OpenAILemmatizer)
        else:
            controller.register_knowledge_source(CValue)
        return controller

    def setup_controller_definition_generation(self, controller: Controller) -> Controller:
        controller.register_knowledge_source(CSVDefinitionResolver)
        if self.policy.use_llm:
            controller.register_knowledge_source(OpenAIDefinitionGenerator)
            controller.register_knowledge_source(OpenAIDefinitionCombiner)
        return controller


    async def process_document(self, file_path: str) -> Blackboard:
        controller = Controller()
        self.setup_controller_document_processing(controller)
        self.setup_controller_term_extraction(controller)
        self.setup_controller_definition_generation(controller)

        await controller.emit(DocumentAdded(path=file_path))

        return controller.blackboard


    async def retrieve_term_definition(self, text: str, context: Optional[str] = None) -> Blackboard:
        controller = Controller()
        self.setup_controller_term_extraction(controller)
        self.setup_controller_definition_generation(controller)

        # TODO: Make proper use of context!!!
        if context is not None:
            controller.blackboard.add_text_source(context)

        await controller.emit(TextExtracted(text=text))

        return controller.blackboard

    async def extract_terminology(self, text: str, context: Optional[str] = None) -> Blackboard:
        controller = Controller()
        self.setup_controller_term_extraction(controller)

        # TODO: Make proper use of context!!!
        if context is not None:
            controller.blackboard.add_text_source(context)

        await controller.emit(TextExtracted(text=text))

        return controller.blackboard

    model_config = {
        "arbitrary_types_allowed": True,
    }


class SessionManager:

    sessions = {}

    @staticmethod
    def setup_controller_llm(controller: Controller):
        controller.register_knowledge_source(OpenAIExtractor)
        # controller.register_knowledge_source(CValue)
        controller.register_knowledge_source(OpenAILemmatizer)
        # TODO: Occurrence Resolver
        # controller.register_knowledge_source(OpenAIDefinitionGenerator)
        # controller.register_knowledge_source(OpenAIDefinitionCombiner)

    @classmethod
    def create_session(cls, policy: KnowledgeSourcePolicy) -> Session:
        session = Session(policy=policy)
        cls.sessions[session.id] = session
        return session

    @classmethod
    def remove_session(cls, session_id: UUID):
        cls.sessions.pop(session_id)