diff options
Diffstat (limited to 'archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py')
| -rw-r--r-- | archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py b/archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py new file mode 100644 index 000000000..0a7bfb7b9 --- /dev/null +++ b/archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py @@ -0,0 +1,46 @@ +from typing import AsyncIterable + +from src.terminology.event import TermExtracted, Event, TermNormalized +from src.terminology.terminology import TermNormalizer + +DEVELOPER_PROMPT = """ +You are an expert in linguistics and languages. +Your job is to transform words and phrases into a normalized and generalized form. +You transform words and phrases into singular form. +You do not replace words with other similar words. +""" + +DEVELOPER_PROMPT_SHORT: str = """ +Bringen den folgenden Begriff in eine Basisform. Behalte die Wortart. +""" + +EXAMPLE_USER: list[str] = [ + "örtlicher Zusatz", + "örtliche Zusätze", + "Betra", + "Aufgabe der Triebfahrzeugführerin", + "Triebfahrzeugführerin", + "Rangierbegleitender", +] + +OUTPUT_ASSISTANT = [ + "örtlicher Zusatz", + "örtlicher Zusatz", + "Betra", + "Aufgabe der Triebfahrzeugführer", + "Triebfahrzeugführer", + "Rangierbegleiter", +] + +EXAMPLES = [message for input_term, output_term in zip(EXAMPLE_USER, OUTPUT_ASSISTANT) for message in + [("user", input_term), ("assistant", output_term)]] + +class LLMTermLemmatizer(TermNormalizer): + + async def get_llm_response(self, term: str) -> str: + pass + + async def activate(self, event: TermExtracted) -> AsyncIterable[Event]: + response = await self.get_llm_response(event.term.text) + event.term.normalization = response + yield TermNormalized(term=event.term) \ No newline at end of file |