From 3e5d3ca82193e8e8561beb9ceac9982f376d84e2 Mon Sep 17 00:00:00 2001 From: Jonas Gerg Date: Tue, 9 Sep 2025 20:06:52 +0200 Subject: Add bsc_gerg --- archive/2025/summer/bsc_gerg/experiments.ipynb | 233 +++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 archive/2025/summer/bsc_gerg/experiments.ipynb (limited to 'archive/2025/summer/bsc_gerg/experiments.ipynb') diff --git a/archive/2025/summer/bsc_gerg/experiments.ipynb b/archive/2025/summer/bsc_gerg/experiments.ipynb new file mode 100644 index 000000000..1bd3669ca --- /dev/null +++ b/archive/2025/summer/bsc_gerg/experiments.ipynb @@ -0,0 +1,233 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": "# Similarity Comparison for Tests", + "id": "b6d4f9b05f293ec8" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "## Fuzzy string matching", + "id": "87c701ae2678e5db" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "# TODO", + "id": "23c3f92212402a87" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "## Sentence Embeddings", + "id": "275e303877d17c08" + }, + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2025-06-30T09:41:56.203596Z", + "start_time": "2025-06-30T09:41:54.449778Z" + } + }, + "source": [ + "\n", + "from sentence_transformers import SentenceTransformer\n", + "\n", + "# The \"all-MiniLM-L6-v2\" model is used for demonstration\n", + "model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n", + "\n", + "expected = [\"Rangieren\", \"420\", \"Gleis\", \"Abstellgleis\", \"auf Sicht fahren\", \"Signal\"]\n", + "actual = [\"Rangieren\", \"420er\", \"Gleis\", \"Abstellgleis\", \"auf Sicht fahren\", \"Signal\"]\n", + "\n", + "sentences = expected + actual\n", + "\n", + "embeddings = model.encode(sentences)\n", + "\n", + "similarities = model.similarity(embeddings, embeddings)\n", + "for i in range(len(expected)):\n", + " print(f\"Expected: {expected[i]}, Actual: {actual[i]}, Similarity: {similarities[i][i+len(expected)]}\")" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Expected: Rangieren, Actual: Rangieren, Similarity: 1.0\n", + "Expected: 420, Actual: 420er, Similarity: 0.8273268938064575\n", + "Expected: Gleis, Actual: Gleis, Similarity: 1.0000003576278687\n", + "Expected: Abstellgleis, Actual: Abstellgleis, Similarity: 1.000000238418579\n", + "Expected: auf Sicht fahren, Actual: auf Sicht fahren, Similarity: 1.0\n", + "Expected: Signal, Actual: Signal, Similarity: 1.0\n" + ] + } + ], + "execution_count": 3 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "## Assesment using LLMs", + "id": "dff194fdcd03c59c" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-30T13:07:07.597774Z", + "start_time": "2025-06-30T13:07:07.582024Z" + } + }, + "cell_type": "code", + "source": [ + "import random\n", + "import numpy as np\n", + "from openai import OpenAI\n", + "from dotenv import load_dotenv\n", + "load_dotenv()\n", + "\n", + "client = OpenAI()\n", + "input_text = \"Rangiere mir bitte mal den 420er von Gleis 3 auf das Abstellgleis. Passt auf, du musst auf Sicht bis zu den Signalen fahren.\"\n", + "expected_terms = [\"Rangieren\", \"420\", \"Abstellgleis\", \"auf Sicht fahren\", \"Signal\"]\n", + "actual_terms = [\"Rangieren\", \"420er\", \"Abstellgleis\", \"auf Sicht fahren\", \"Signal\"]\n", + "\n", + "def run_test(shuffle=False):\n", + " expected = expected_terms.copy()\n", + " actual = actual_terms.copy()\n", + " if shuffle:\n", + " random.shuffle(expected)\n", + " random.shuffle(actual)\n", + " print(f\"Expected: {expected}, Actual: {actual}\")\n", + " response = client.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " response_format={\"type\": \"text\"},\n", + " temperature=0,\n", + " top_p=0,\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"Bewerte die Ähnlichkeit der Ergebnisse der Term Extraktion. Gegeben ist ein Ausgangstext, \"\n", + " \"aus dem Fachbegriffe extrahiert werden mussten. Der Text ist gegeben. Darunter stehen die erwarteten Begriffe, \"\n", + " \"die extrahiert werden sollten. Zum Schluss stehen die tatsächlich extrahierten Begriffe. \"\n", + " \"Bewerte die Ähnlichkeit der extrahierten Begriffe. \"\n", + " \"Ignoriere die Reihenfolge der Begriffe.\"\n", + " \"Sobald sich ein Begriff grundlegend unterscheidet, beende sofort mit FALSE.\"\n", + " \"Wenn ein erwarteter Begriff gänzlich fehlt, beende sofort mit FALSE.\"\n", + " \"Wenn ein Begriff extrahiert wurde, der sicher kein Fachbegriff ist, beende sofort mit FALSE.\"\n", + " \"Ansonsten Ende sofort mit TRUE.\"\n", + " \"Bewerte die extrahierten Begriffe.\"\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"\"\"{input_text}\\n\\nErwartete Begriffe: {\", \".join(expected)}\\n\\nTatsächliche Begriffe: {\", \".join(actual)}\"\"\"\n", + " }\n", + " ],\n", + " logprobs=True,\n", + " seed=42,\n", + " store=False,\n", + " top_logprobs=5,\n", + " frequency_penalty=0,\n", + " presence_penalty=0,\n", + " )\n", + " result, logprobs = response.choices[0].message.content, response.choices[0].logprobs\n", + " # Get probabilities for TRUE and FALSE as the last token\n", + " probs = {token.token: float(np.exp(token.logprob)) for token in logprobs.content[0].top_logprobs if token.token == \"TRUE\" or token.token == \"FALSE\"}\n", + " # Normalize the probabilities to only account for TRUE and FALSE\n", + " # This might actually distort the result, as other, maybe more likely tokens are ignored (however, such results can be considered faulty)\n", + " total_end = sum(probs.values())\n", + " normalized_probs = {token: value / total_end for token, value in probs.items()}\n", + " print(f\"Probability for success of test {normalized_probs['TRUE']}\")" + ], + "id": "6924f5f7741325d0", + "outputs": [], + "execution_count": 49 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-30T13:07:13.049204Z", + "start_time": "2025-06-30T13:07:09.535572Z" + } + }, + "cell_type": "code", + "source": [ + "for i in range(5):\n", + " run_test(shuffle=False)" + ], + "id": "235b5091f0936d3e", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Probability for success of test 0.2942149597859341\n", + "Probability for success of test 0.46879062662624377\n", + "Probability for success of test 0.2942149597859341\n", + "Probability for success of test 0.26894142136999516\n", + "Probability for success of test 0.24508501864634824\n" + ] + } + ], + "execution_count": 50 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-30T13:07:16.452505Z", + "start_time": "2025-06-30T13:07:14.379517Z" + } + }, + "cell_type": "code", + "source": [ + "for i in range(5):\n", + " run_test(shuffle=True)" + ], + "id": "6e5159dd554e4469", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Expected: ['420', 'auf Sicht fahren', 'Rangieren', 'Abstellgleis', 'Signal'], Actual: ['Abstellgleis', 'Rangieren', 'Signal', '420er', 'auf Sicht fahren']\n", + "Probability for success of test 0.2689414096510109\n", + "Expected: ['Rangieren', 'auf Sicht fahren', '420', 'Signal', 'Abstellgleis'], Actual: ['Rangieren', '420er', 'auf Sicht fahren', 'Abstellgleis', 'Signal']\n", + "Probability for success of test 0.053403330553099\n", + "Expected: ['Rangieren', 'auf Sicht fahren', 'Signal', '420', 'Abstellgleis'], Actual: ['Signal', '420er', 'Abstellgleis', 'Rangieren', 'auf Sicht fahren']\n", + "Probability for success of test 0.9820137910906878\n", + "Expected: ['Abstellgleis', '420', 'auf Sicht fahren', 'Signal', 'Rangieren'], Actual: ['420er', 'auf Sicht fahren', 'Signal', 'Abstellgleis', 'Rangieren']\n", + "Probability for success of test 0.8807970779778824\n", + "Expected: ['Rangieren', 'Signal', '420', 'auf Sicht fahren', 'Abstellgleis'], Actual: ['Rangieren', 'auf Sicht fahren', 'Signal', '420er', 'Abstellgleis']\n", + "Probability for success of test 0.09534946618445304\n" + ] + } + ], + "execution_count": 51 + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- cgit 1.4.1