about summary refs log tree commit diff stats
path: root/archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py
diff options
context:
space:
mode:
Diffstat (limited to 'archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py')
-rw-r--r--archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py46
1 files changed, 46 insertions, 0 deletions
diff --git a/archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py b/archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py
new file mode 100644
index 000000000..0a7bfb7b9
--- /dev/null
+++ b/archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py
@@ -0,0 +1,46 @@
+from typing import AsyncIterable
+
+from src.terminology.event import TermExtracted, Event, TermNormalized
+from src.terminology.terminology import TermNormalizer
+
+DEVELOPER_PROMPT = """
+You are an expert in linguistics and languages.
+Your job is to transform words and phrases into a normalized and generalized form.
+You transform words and phrases into singular form.
+You do not replace words with other similar words.
+"""
+
+DEVELOPER_PROMPT_SHORT: str = """
+Bringen den folgenden Begriff in eine Basisform. Behalte die Wortart.
+"""
+
+EXAMPLE_USER: list[str] = [
+    "örtlicher Zusatz",
+    "örtliche Zusätze",
+    "Betra",
+    "Aufgabe der Triebfahrzeugführerin",
+    "Triebfahrzeugführerin",
+    "Rangierbegleitender",
+]
+
+OUTPUT_ASSISTANT = [
+    "örtlicher Zusatz",
+    "örtlicher Zusatz",
+    "Betra",
+    "Aufgabe der Triebfahrzeugführer",
+    "Triebfahrzeugführer",
+    "Rangierbegleiter",
+]
+
+EXAMPLES = [message for input_term, output_term in zip(EXAMPLE_USER, OUTPUT_ASSISTANT) for message in
+                [("user", input_term), ("assistant", output_term)]]
+
+class LLMTermLemmatizer(TermNormalizer):
+
+    async def get_llm_response(self, term: str) -> str:
+        pass
+
+    async def activate(self, event: TermExtracted) -> AsyncIterable[Event]:
+        response = await self.get_llm_response(event.term.text)
+        event.term.normalization = response
+        yield TermNormalized(term=event.term)
\ No newline at end of file