{ "cells": [ { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2025-07-04T17:20:42.362428Z", "start_time": "2025-07-04T17:20:42.350473Z" } }, "source": [ "import json\n", "\n", "import numpy as np\n", "from tabulate import tabulate\n", "\n", "with open(\"./../data/test_performance_output.json\", \"r\") as f:\n", " data = json.load(f)\n", "\n", "table = []\n", "\n", "print(f\"Num test results {len(data)}\")\n", "\n", "for test in data:\n", " # print(f\"Test: {test['input']}\")\n", " result = test['result']\n", " num_expected = len(test['expected'])\n", " num_terms = len(result[\"terms\"])\n", " num_definitions = len([definition for term in result[\"terms\"] for definition in term[\"definitions\"]])\n", "\n", " true_expected = test['trueExpected']\n", " allowed_expected = test['allowedExpected']\n", " allowed_unexpected = test['allowedUnexpected']\n", " false_observed = test['falseObserved']\n", " false_definitions = test['falseDefinitions']\n", "\n", " table.append([\n", " 1 if num_expected == 0 else true_expected / num_expected,\n", " 1 if num_expected == 0 else allowed_expected / num_expected,\n", " 1 if num_terms == 0 else (num_terms - false_observed) / num_terms,\n", " 1 if num_definitions == 0 else (num_definitions - false_definitions) / num_definitions\n", " ])\n", "\n", "print(tabulate(table, headers=[\n", " \"Recall (Strict)\",\n", " \"Recall\",\n", " \"Precision\",\n", " \"Gen. Def. Validity\"\n", "]))\n", "\n", "print(\"\\n#### Averages ####\\n\")\n", "\n", "print(tabulate([np.average(np.array(table), axis=0)], headers=[\n", " \"Recall (Strict)\",\n", " \"Recall\",\n", " \"Precision\",\n", " \"Gen. Def. Validity\"\n", "]))\n", "\n", "# data" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Num test results 21\n", " Recall (Strict) Recall Precision Gen. Def. Validity\n", "----------------- -------- ----------- --------------------\n", " 0.666667 1 1 1\n", " 1 1 0.8 0.25\n", " 0.666667 1 0.75 1\n", " 1 1 0.6 1\n", " 1 1 0.8 0.333333\n", " 0.714286 0.857143 1 0.5\n", " 0.8 1 1 0.5\n", " 1 1 0 1\n", " 1 1 1 1\n", " 1 1 0.6 1\n", " 1 1 0.428571 1\n", " 1 1 1 1\n", " 1 1 1 1\n", " 1 1 1 0\n", " 0.8 1 1 1\n", " 1 1 0.666667 1\n", " 0.75 0.75 0.666667 1\n", " 1 1 0.333333 1\n", " 1 1 1 1\n", " 1 1 1 1\n", " 0.8 1 0.833333 1\n", "\n", "#### Averages ####\n", "\n", " Recall (Strict) Recall Precision Gen. Def. Validity\n", "----------------- -------- ----------- --------------------\n", " 0.914172 0.981293 0.784694 0.837302\n" ] } ], "execution_count": 63 } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }