about summary refs log tree commit diff stats
path: root/archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py
blob: 0a7bfb7b9797fd02eb87facf5006b768c0d8a2f0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from typing import AsyncIterable

from src.terminology.event import TermExtracted, Event, TermNormalized
from src.terminology.terminology import TermNormalizer

DEVELOPER_PROMPT = """
You are an expert in linguistics and languages.
Your job is to transform words and phrases into a normalized and generalized form.
You transform words and phrases into singular form.
You do not replace words with other similar words.
"""

DEVELOPER_PROMPT_SHORT: str = """
Bringen den folgenden Begriff in eine Basisform. Behalte die Wortart.
"""

EXAMPLE_USER: list[str] = [
    "örtlicher Zusatz",
    "örtliche Zusätze",
    "Betra",
    "Aufgabe der Triebfahrzeugführerin",
    "Triebfahrzeugführerin",
    "Rangierbegleitender",
]

OUTPUT_ASSISTANT = [
    "örtlicher Zusatz",
    "örtlicher Zusatz",
    "Betra",
    "Aufgabe der Triebfahrzeugführer",
    "Triebfahrzeugführer",
    "Rangierbegleiter",
]

EXAMPLES = [message for input_term, output_term in zip(EXAMPLE_USER, OUTPUT_ASSISTANT) for message in
                [("user", input_term), ("assistant", output_term)]]

class LLMTermLemmatizer(TermNormalizer):

    async def get_llm_response(self, term: str) -> str:
        pass

    async def activate(self, event: TermExtracted) -> AsyncIterable[Event]:
        response = await self.get_llm_response(event.term.text)
        event.term.normalization = response
        yield TermNormalized(term=event.term)