diff options
| author | Martin Fink <martin@finkmartin.com> | 2025-09-11 09:19:48 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-11 09:19:48 +0200 |
| commit | 17af5f6fc0538f615b8612dcd2cb77c2affad63f (patch) | |
| tree | 76e4c260123b68b93da2417482024ba11f9838ee /archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py | |
| parent | a910d0a3e57f4de47cf2387ac239ae8d0eaca507 (diff) | |
| parent | 3e5d3ca82193e8e8561beb9ceac9982f376d84e2 (diff) | |
| download | research-work-archive-artifacts-17af5f6fc0538f615b8612dcd2cb77c2affad63f.tar.gz research-work-archive-artifacts-17af5f6fc0538f615b8612dcd2cb77c2affad63f.zip | |
Merge pull request #10 from walamana/main
Add bsc_gerg
Diffstat (limited to 'archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py')
| -rw-r--r-- | archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py b/archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py new file mode 100644 index 000000000..0a7bfb7b9 --- /dev/null +++ b/archive/2025/summer/bsc_gerg/src/knowledge/llm/lemmatize.py @@ -0,0 +1,46 @@ +from typing import AsyncIterable + +from src.terminology.event import TermExtracted, Event, TermNormalized +from src.terminology.terminology import TermNormalizer + +DEVELOPER_PROMPT = """ +You are an expert in linguistics and languages. +Your job is to transform words and phrases into a normalized and generalized form. +You transform words and phrases into singular form. +You do not replace words with other similar words. +""" + +DEVELOPER_PROMPT_SHORT: str = """ +Bringen den folgenden Begriff in eine Basisform. Behalte die Wortart. +""" + +EXAMPLE_USER: list[str] = [ + "örtlicher Zusatz", + "örtliche Zusätze", + "Betra", + "Aufgabe der Triebfahrzeugführerin", + "Triebfahrzeugführerin", + "Rangierbegleitender", +] + +OUTPUT_ASSISTANT = [ + "örtlicher Zusatz", + "örtlicher Zusatz", + "Betra", + "Aufgabe der Triebfahrzeugführer", + "Triebfahrzeugführer", + "Rangierbegleiter", +] + +EXAMPLES = [message for input_term, output_term in zip(EXAMPLE_USER, OUTPUT_ASSISTANT) for message in + [("user", input_term), ("assistant", output_term)]] + +class LLMTermLemmatizer(TermNormalizer): + + async def get_llm_response(self, term: str) -> str: + pass + + async def activate(self, event: TermExtracted) -> AsyncIterable[Event]: + response = await self.get_llm_response(event.term.text) + event.term.normalization = response + yield TermNormalized(term=event.term) \ No newline at end of file |