add bsc_bambou to archive-artifacts (#11)

* add bsc_bambou * add description for bsc_bambou * add plotting functions * add plotting functions
author: Steve Bambou <99360731+FullbusterSteve@users.noreply.github.com> 2025-09-17 13:32:40 +0200
committer: GitHub <noreply@github.com> 2025-09-17 13:32:40 +0200
commit: ad15cd7496c75b0d6440145ffd8f28c20c366093 (patch)
tree: 875ac7a9d578d45b61289b9c16b6cd6d038a6ef5 /archive/2025/summer
parent: fa80a2aca83ba56daa24cbfa737d0c40df8950ee (diff)
download: research-work-archive-artifacts-ad15cd7496c75b0d6440145ffd8f28c20c366093.tar.gz
research-work-archive-artifacts-ad15cd7496c75b0d6440145ffd8f28c20c366093.zip
2 files changed, 818 insertions, 0 deletions
diff --git a/archive/2025/summer/bsc_bambou/README.md b/archive/2025/summer/bsc_bambou/README.md
new file mode 100644
index 000000000..a06ee73af
--- /dev/null
+++ b/archive/2025/summer/bsc_bambou/README.md
@@ -0,0 +1,34 @@
+# Artifact Archive: BSc Thesis Steve Bambou
+
+This folder contains the research artifacts for my Bachelor thesis at [TUM DSE](https://dse.in.tum.de/).
+
+## Code Base
+
+The main source code for my thesis is available in the following repository:
+
+- [Code Repository](https://github.com/dhschall/gem5-fdp/tree/pf-rework-multi-pred)
+
+This repository includes:
+- Source code
+- Build system files
+
+## Evaluation Code
+
+The evaluation scripts and analysis tools are maintained separately in:
+
+- [Evaluation Repository](https://github.com/dhschall/gem5-svr-bench/tree/multi-pred)
+
+This repository contains:
+- Gem5 configuration scripts
+- Benchmark configuration scripts
+- Data processing scripts
+
+
+## How to Reproduce Results
+
+1. Clone the code base repository.
+2. Clone the evaluation repository inside of the code base repository.
+3. Follow the instructions in each repository’s `README.md` to build and run the code.
+4. Use the evaluation scripts to run the benchmarks.
+5. Use the data collection script to aggregate the benchmark results in single .csv files for SPEC and non SPEC workloads.  
+6. Use this [script](evaluation_utils.ipynb) to generate plots.
\ No newline at end of file
diff --git a/archive/2025/summer/bsc_bambou/evaluation_utils.ipynb b/archive/2025/summer/bsc_bambou/evaluation_utils.ipynb
new file mode 100644
index 000000000..b2def4301
--- /dev/null
+++ b/archive/2025/summer/bsc_bambou/evaluation_utils.ipynb
@@ -0,0 +1,784 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c0f20eed-308c-4f28-824b-657d808788a5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import re\n",
+    "import seaborn as sns\n",
+    "from matplotlib.transforms import offset_copy\n",
+    "import matplotlib.patches as mpatches\n",
+    "\n",
+    "def column_mapping(cores):\n",
+    "    return {'finalTick' : 'finalTick',\n",
+    "                  'benchmark' : 'benchmark',\n",
+    "                  'experiment' : 'experiment',\n",
+    "                  f'board.processor.cores{cores}.core.ipc': 'ipc',\n",
+    "                  f'board.processor.cores{cores}.core.branchPred.mispredictDueToBTBMiss_0::total': 'btb_mispredicts',\n",
+    "                  f'board.processor.cores{cores}.core.branchPred.mispredictDueToPredictor_0::DirectCond' : 'cond_mispredicts',\n",
+    "                  f'board.processor.cores{cores}.core.branchPred.mispredictDueToPredictor_0::total': 'bp_mispredicts',\n",
+    "                  f'board.processor.cores{cores}.core.branchPred.mispredicted_0::total' : 'total_mispredicts',\n",
+    "                  f'board.processor.cores{cores}.core.TopDownL1.frontendBound' : 'TopDownL1_frontendBound',\n",
+    "                  f'board.processor.cores{cores}.core.TopDownL1.badSpeculation' : 'TopDownL1_badSpeculation',\n",
+    "                  f'board.processor.cores{cores}.core.TopDownL1.backendBound' : 'TopDownl1_backendBound',\n",
+    "                  f'board.processor.cores{cores}.core.TopDownL1.retiring' : 'TopDownl1_retiring',\n",
+    "                  f'board.processor.cores{cores}.core.rename.serializeStallCycles' : 'serializingStallCycles',\n",
+    "                  f'board.processor.cores{cores}.core.rename.ROBFullEvents' : 'rob_full',\n",
+    "                  f'board.processor.cores{cores}.core.rename.IQFullEvents':'iq_full',\n",
+    "                  f'board.processor.cores{cores}.core.rename.LQFullEvents' : 'lq_full',\n",
+    "                  f'board.processor.cores{cores}.core.rename.SQFullEvents' : 'sq_full',\n",
+    "                  f'board.processor.cores{cores}.core.rename.fullRegistersEvents' : 'register_full',\n",
+    "                  f'board.processor.cores{cores}.core.fetchStats0.icacheStallCycles' : 'icache_stall_cycles',\n",
+    "                  f'board.processor.cores{cores}.core.iew.memOrderViolationEvents' : 'memOrderViolations',\n",
+    "                  f'board.processor.cores{cores}.core.TopDownL2_BackendBound.serializeStalls' : 'TopDownL2_serializingStalls',\n",
+    "                  f'board.processor.cores{cores}.core.TopDownL2_BackendBound.memoryBound' : 'TopDownL2_memoryBound',\n",
+    "                  f'board.processor.cores{cores}.core.TopDownL2_BackendBound.coreBound' : 'TopDownL2_coreBound',\n",
+    "                  f'board.processor.cores{cores}.core.TopDownL2_BadSpeculation.machineClears' : 'TopDownL2_machineClears',\n",
+    "                  f'board.processor.cores{cores}.core.TopDownL2_BadSpeculation.branchMissPredicts' : 'TopDownL2_Mispredicts',\n",
+    "                  f'board.processor.cores{cores}.core.issueRate' : 'issueRate',\n",
+    "                  f'board.processor.cores{cores}.core.fetchStats0.fetchRate' : 'fetchRate',\n",
+    "                  f'board.processor.cores{cores}.core.commitStats0.numInsts' : 'insts',\n",
+    "                  f'board.processor.cores{cores}.core.cpi' : 'cpi',\n",
+    "                  f'board.processor.cores{cores}.core.idleCycles' : 'idleCycles',\n",
+    "                  f'board.processor.cores{cores}.core.numCycles' : 'numCycles', \n",
+    "                  f'board.processor.cores{cores}.core.robOccupancy::mean' : 'ROB mean occupancy',\n",
+    "                  f'board.cache_hierarchy.l1icaches{cores}.demandMshrMisses::total' : 'l1icacheMisses',\n",
+    "                  f'board.cache_hierarchy.l1dcaches{cores}.demandMshrMisses::total' : 'l1dcacheMisses',\n",
+    "                  f'board.processor.cores{cores}.core.bac.ftSizeDist::mean' : 'mean_ftSize',\n",
+    "                  f'board.processor.cores{cores}.core.bac.ftNumber::mean' : 'mean_ftNumber',\n",
+    "                  f'board.processor.cores{cores}.core.numIssuedDist::mean' : 'mean_issuedInsts',\n",
+    "                  f'board.processor.cores{cores}.core.iew.dispInstDist::mean' : 'mean_dispatchedInsts',\n",
+    "                  f'board.processor.cores{cores}.core.iew.wbRate' : 'wbRate',\n",
+    "                  f'board.processor.cores{cores}.core.ftq.occupancy::mean' : 'Mean FTQ Occupancy',\n",
+    "                  f'board.processor.cores{cores}.core.instSquashedAexperimenttIssueDist::mean' : 'mean_squashedInstsAtIssue',\n",
+    "                  f'board.processor.cores{cores}.core.lsq0.blockedByCache' : 'lsqBlockebByCache',\n",
+    "                  f'board.processor.cores{cores}.core.rob.robSquashCycles' : 'robSquashCycles',\n",
+    "                  f'board.processor.cores{cores}.core.rob.independentInstDelta::mean' : 'independentInstDelta',\n",
+    "                  f'board.processor.cores{cores}.core.rob.independentInst' : 'independentInst'\n",
+    "           }\n",
+    "\n",
+    "\n",
+    "\n",
+    "def loadAndPrepare(fileNames, cores=[], experiment_marker = []):\n",
+    "    dfs = []\n",
+    "    for idx, fileName in enumerate(fileNames):\n",
+    "        tmp_df = pd.read_csv(fileName).fillna(0)\n",
+    "        if len(experiment_marker) > idx:\n",
+    "            tmp_df['set'] = experiment_marker[idx]    \n",
+    "        tmp_df.rename(columns=column_mapping(cores[idx] if len (cores) > 0 else \"\"), inplace=True)    \n",
+    "        dfs.append(tmp_df)\n",
+    "    df = pd.concat(dfs , ignore_index=True)\n",
+    "    \n",
+    "    \n",
+    "    #df.drop(columns=[col for col in df.columns if not (col.startswith('board.processor.cores1.core') or col in column_mapping.values())], inplace=True)\n",
+    "\n",
+    "    new_cols = {\n",
+    "    'mpki': df['total_mispredicts'] * 1000 / df['insts'],\n",
+    "    'btb_mpki': df['btb_mispredicts'] * 1000 / df['insts'],\n",
+    "    'cond_mpki': df['cond_mispredicts'] * 1000 / df['insts'],\n",
+    "    'bp_mpki': df['bp_mispredicts'] * 1000 / df['insts'],\n",
+    "    'icache_mpki': df['l1icacheMisses'] * 1000 / df['insts'],\n",
+    "    'IPC': df['insts'] / (df['numCycles'] - df['idleCycles']),\n",
+    "    'm_cpi': 1 / (df['insts'] / (df['numCycles'] - df['idleCycles'])),\n",
+    "    'L1IStallCycleRate': df['icache_stall_cycles'] / (df['numCycles'] - df['idleCycles']),\n",
+    "    'fetchRate' : df['fetchRate'] * df['numCycles'] / (df['numCycles'] - df['idleCycles']),\n",
+    "    'serializingStallCycleRate': df['serializingStallCycles'] / (df['numCycles'] - df['idleCycles']),\n",
+    "    'ppc' :  df[\"experiment\"].str.extract(r\"(ppc\\d*)\")   \n",
+    "}\n",
+    "   \n",
+    "    df['benchmark'] = df['benchmark'].replace('502.gcc_r.gcc-pp.opts-O3_-finline-limit_36000', '502.gcc_r')\n",
+    "    df = df.assign(**new_cols)\n",
+    "    \n",
+    "    \n",
+    "    return df\n",
+    "\n",
+    "def removeWarmup(df):\n",
+    "    n = 2\n",
+    "    mask = df.groupby(['benchmark', 'experiment']).cumcount() >= n\n",
+    "    df = df[mask].reset_index(drop=True)\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def filterBenchExperiment(df, experiment_like, exclude_benchs = [] , only_benchs = []):\n",
+    "    if len(only_benchs) > 0:\n",
+    "        df = df[df['benchmark'].isin(only_benchs)]\n",
+    "    else:    \n",
+    "        df = df[~df['benchmark'].isin(exclude_benchs)]\n",
+    "        \n",
+    "    df = df[df['experiment'].str.contains(experiment_like, regex = True, na = False)]\n",
+    "    return df\n",
+    "\n",
+    "def plotYoverTime(df, y, experiment_like = '', exclude_benchs = [], only_benchs = []):\n",
+    "    x = 'finalTick'\n",
+    "    \n",
+    "    df = filterBenchExperiment(df, experiment_like, exclude_benchs , only_benchs)\n",
+    "\n",
+    "    for benchmark, group in df.groupby('benchmark'):\n",
+    "        plt.figure(figsize=(8, 5))\n",
+    "        plt.title(f\"Benchmark: {benchmark}\")\n",
+    "    \n",
+    "        # Group by experiment within the benchmark group\n",
+    "        for experiment, subgrp in group.groupby('experiment'):\n",
+    "            plt.plot(subgrp[x], subgrp[y], label=experiment)      # Line\n",
+    "            plt.scatter(subgrp[x], subgrp[y])                      # Scatter points\n",
+    "    \n",
+    "        plt.xlabel(x)\n",
+    "        plt.ylabel(y)\n",
+    "        plt.legend(title=\"Experiment\")\n",
+    "        plt.grid(True)\n",
+    "        plt.tight_layout()\n",
+    "        plt.show()\n",
+    "\n",
+    "\n",
+    "\n",
+    "def barPlotY_B(\n",
+    "    dfs,\n",
+    "    y,\n",
+    "    label=None,\n",
+    "    experiment_like='',\n",
+    "    exclude_benchs=[],\n",
+    "    only_benchs=[],\n",
+    "    save_as=\"\",\n",
+    "    ylim=None,\n",
+    "    legend_out=False,\n",
+    "    show_means=False,  # mean bars across benchmarks\n",
+    "    ref_value=None     # percentage ref value\n",
+    "):\n",
+    "    # --- If a single df is passed instead of list, wrap it ---\n",
+    "    if isinstance(dfs, pd.DataFrame):\n",
+    "        dfs = [dfs]\n",
+    "\n",
+    "    # --- Concatenate with explicit set ordering preserved ---\n",
+    "    df = pd.concat(dfs, ignore_index=True)\n",
+    "\n",
+    "    # --- Maintain set order as provided ---\n",
+    "    ordered_sets = []\n",
+    "    for d in dfs:\n",
+    "        for s in d['set'].unique():\n",
+    "            if s not in ordered_sets:\n",
+    "                ordered_sets.append(s)\n",
+    "\n",
+    "    df = filterBenchExperiment(df, experiment_like, exclude_benchs, only_benchs)\n",
+    "\n",
+    "    # --- Group by benchmark, set, ppc ---\n",
+    "    grouped = df.groupby(['benchmark', 'set', 'ppc'])[y].mean().reset_index()\n",
+    "\n",
+    "    # --- Optionally add \"Mean\" row across benchmarks ---\n",
+    "    if show_means:\n",
+    "        mean_rows = (\n",
+    "            grouped.groupby(['set', 'ppc'])[y]\n",
+    "            .mean()\n",
+    "            .reset_index()\n",
+    "            .assign(benchmark=\"Mean\")\n",
+    "        )\n",
+    "        grouped = pd.concat([grouped, mean_rows], ignore_index=True)\n",
+    "\n",
+    "    # --- Plot ---\n",
+    "    benchmarks = grouped['benchmark'].unique()\n",
+    "    sets = ordered_sets  # use preserved order\n",
+    "    ppcs = grouped['ppc'].unique()\n",
+    "\n",
+    "    x = np.arange(len(benchmarks))\n",
+    "    width = 0.8 / len(sets)  # width per set group\n",
+    "\n",
+    "    hatch_patterns = [\"//\" , \"...\", \"\\\\\\\\\", \"xx\", \"++\"]  # extend if needed\n",
+    "    colors = sns.color_palette(\"tab10\", len(ppcs))\n",
+    "\n",
+    "    fig, ax = plt.subplots(figsize=(12, 6))\n",
+    "\n",
+    "    for i, s in enumerate(sets):\n",
+    "        for j, p in enumerate(ppcs):\n",
+    "            vals = []\n",
+    "            for b in benchmarks:\n",
+    "                row = grouped[(grouped['benchmark'] == b) & (grouped['set'] == s) & (grouped['ppc'] == p)]\n",
+    "                vals.append(row[y].values[0] if not row.empty else 0)\n",
+    "\n",
+    "            bars = ax.bar(\n",
+    "                x + i * width + j * (width / len(ppcs)),\n",
+    "                vals,\n",
+    "                width / len(ppcs),\n",
+    "                color=colors[j],\n",
+    "                hatch=hatch_patterns[i % len(hatch_patterns)],\n",
+    "                edgecolor=\"black\"\n",
+    "            )\n",
+    "\n",
+    "            # --- Add percentage labels if ref_value is given ---\n",
+    "            if ref_value is not None:\n",
+    "                for bar in bars:\n",
+    "                    height = bar.get_height()\n",
+    "                    if height > 0:\n",
+    "                        perc = (height / ref_value) * 100\n",
+    "                        ax.text(\n",
+    "                            bar.get_x() + bar.get_width() / 2,\n",
+    "                            height + (ylim * 0.01 if ylim else height * 0.01),\n",
+    "                            f\"{perc:.1f}%\",\n",
+    "                            ha=\"center\", va=\"bottom\", fontsize=8\n",
+    "                        )\n",
+    "\n",
+    "    # --- Beautify ---\n",
+    "    ax.set_xticks(x + (len(sets) - 1) * width / 2)\n",
+    "    ax.set_xticklabels(benchmarks, rotation=45, ha=\"right\")\n",
+    "\n",
+    "    if ylim:\n",
+    "        plt.ylim(0, ylim)\n",
+    "\n",
+    "    ax.set_title(f'{label if label else y} per Benchmark')\n",
+    "    ax.set_xlabel('Benchmark')\n",
+    "    ax.set_ylabel(f'{label if label else y}')\n",
+    "\n",
+    "    # --- Single combined legend ---\n",
+    "    color_patches = [mpatches.Patch(color=colors[j], label=f\"{p}\") for j, p in enumerate(ppcs)]\n",
+    "    hatch_patches = [mpatches.Patch(facecolor=\"white\", hatch=hatch_patterns[i % len(hatch_patterns)], edgecolor=\"black\", label=f\"{s}\") for i, s in enumerate(sets)]\n",
+    "\n",
+    "    all_handles = color_patches + hatch_patches\n",
+    "\n",
+    "    ax.legend(\n",
+    "        handles=all_handles,\n",
+    "        title=\"Legend\",\n",
+    "        loc=\"best\",\n",
+    "        bbox_to_anchor=(1.0, 0.5) if legend_out else None\n",
+    "    )\n",
+    "\n",
+    "    plt.grid(axis='y', linestyle='--', alpha=0.7)\n",
+    "    plt.tight_layout()\n",
+    "\n",
+    "    if len(save_as) > 0:\n",
+    "        plt.savefig(f\"bar_plots/{save_as}.png\", bbox_inches='tight')\n",
+    "\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "def barPlotY(\n",
+    "    df, \n",
+    "    y, \n",
+    "    label=None, \n",
+    "    experiment_like='', \n",
+    "    exclude_benchs=[], \n",
+    "    only_benchs=[], \n",
+    "    save_as=\"\", \n",
+    "    ylim=None, \n",
+    "    legend_out=False,\n",
+    "    show_means=False,  # NEW flag for mean bars\n",
+    "    ref_value=None     # NEW: reference value for percentage\n",
+    "):\n",
+    "\n",
+    "    df = filterBenchExperiment(df, experiment_like, exclude_benchs, only_benchs)\n",
+    "    \n",
+    "    # Mean per benchmark per experiment\n",
+    "    grouped = df.groupby(['benchmark', 'experiment'])[f'{y}'].mean().unstack()\n",
+    "\n",
+    "    # Optionally add a \"Mean\" row across benchmarks\n",
+    "    if show_means:\n",
+    "        means = grouped.mean(axis=0)\n",
+    "        grouped.loc[\"Mean\"] = means\n",
+    "\n",
+    "    # Step 2: Plot as bar chart\n",
+    "    ax = grouped.plot(kind='bar', figsize=(10, 6))\n",
+    "    \n",
+    "    # Step 3: Beautify\n",
+    "    if ylim:\n",
+    "        plt.ylim(0, ylim)\n",
+    "    plt.title(f'{label if label else y} per Benchmark per Experiment')\n",
+    "    plt.xlabel('Benchmark')\n",
+    "    plt.ylabel(f'{label if label else y}')\n",
+    "    plt.legend(\n",
+    "        title='Experiment', \n",
+    "        loc=\"best\",\n",
+    "        bbox_to_anchor=(1.0, 0.5) if legend_out else None\n",
+    "    )\n",
+    "    plt.ylim(bottom=0)\n",
+    "    plt.grid(axis='y', linestyle='--', alpha=0.7)\n",
+    "    plt.tight_layout()\n",
+    "\n",
+    "    # Step 4: Add percentages if ref_value is given\n",
+    "    if ref_value is not None:\n",
+    "        for container in ax.containers:  # each experiment\n",
+    "            for bar in container:\n",
+    "                height = bar.get_height()\n",
+    "                if height > 0:\n",
+    "                    perc = (height / ref_value) * 100\n",
+    "                    ax.text(\n",
+    "                        bar.get_x() + bar.get_width() / 2,\n",
+    "                        height + (ylim * 0.01 if ylim else height * 0.01),  # offset a bit above\n",
+    "                        f\"{perc:.1f}%\",\n",
+    "                        ha=\"center\", va=\"bottom\", fontsize=8\n",
+    "                    )\n",
+    "\n",
+    "    if len(save_as) > 0:\n",
+    "        plt.savefig(f\"bar_plots/{save_as}.png\", bbox_inches='tight')\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    "\n",
+    "def plotMeanY(df, y, experiment_like = '', exclude_benchs = [], only_benchs = [], save_as = \"\"):\n",
+    "\n",
+    "    df = filterBenchExperiment(df, experiment_like, exclude_benchs , only_benchs)\n",
+    "    \n",
+    "    grouped = df.groupby(['benchmark', 'experiment'])[f'{y}'].mean().unstack()\n",
+    "\n",
+    "    # Step 2: Plot as bar chart\n",
+    "    grouped.plot(kind='bar', figsize=(10, 6))\n",
+    "    \n",
+    "    # Step 3: Beautify\n",
+    "    plt.title(f'Mean of {y} per Benchmark per Experiment')\n",
+    "    plt.xlabel('Benchmark')\n",
+    "    plt.ylabel(f'Mean {y}')\n",
+    "    plt.legend(title='Experiment', \n",
+    "              bbox_to_anchor=(1.0, 0.5) \n",
+    "              )\n",
+    "    plt.grid(axis= 'y', linestyle='--', alpha=0.7)\n",
+    "    plt.tight_layout()\n",
+    "    if len(save_as) > 0:\n",
+    "        plt.savefig(f\"bar_plots/{save_as}.png\",  bbox_inches='tight')\n",
+    "    plt.show()\n",
+    "\n",
+    "def plotYDensity(df, y , cores=\"\",  experiment_like = '', exclude_benchs = [], only_benchs = [], label = None ,   save_as = \"\"):\n",
+    "\n",
+    "    variable_supported = {\n",
+    "                            'robOccupancy': fr'board\\.processor\\.cores{cores}\\.core\\.robOccupancy::\\d+(-\\d+)?$', \n",
+    "                            'ftNumber' : fr'board\\.processor\\.cores{cores}\\.core\\.bac\\.ftNumber::\\d+(-\\d+)?$',\n",
+    "                            'BB_Size' :  fr'board\\.processor\\.cores{cores}\\.core\\.bac\\.ftSizeDist::\\d+(-\\d+)?$',\n",
+    "                            'issuedInsts': fr'board\\.processor\\.cores{cores}\\.core\\.numIssuedDist::\\d+(-\\d+)?$', \n",
+    "                            'ftqOccupancy' : fr'board\\.processor\\.cores{cores}\\.core\\.ftq\\.occupancy::\\d+(-\\d+)?$'\n",
+    "                         }\n",
+    "\n",
+    "    if y not in variable_supported.keys():\n",
+    "        print(\"The variable entered is not yet supported!\")\n",
+    "        return\n",
+    "    \n",
+    "    df = filterBenchExperiment(df, experiment_like, exclude_benchs , only_benchs)\n",
+    "\n",
+    "    # Find all columns that represent ROB occupancy buckets (excluding 'total', 'mean', 'stddev', 'samples')\n",
+    "    bucket_cols = [\n",
+    "        col for col in df.columns\n",
+    "        if re.match(variable_supported[y] , col)\n",
+    "    ]\n",
+    "    \n",
+    "    if not bucket_cols:\n",
+    "        print(f\"No {y} bucket columns found.\")\n",
+    "        return\n",
+    "\n",
+    "    # Prepare data for plotting\n",
+    "    df_plot = df[['benchmark', 'experiment', 'finalTick'] + bucket_cols].copy()\n",
+    "    df_plot = df_plot.loc[df_plot.groupby(['experiment', 'benchmark'])['finalTick'].idxmax()]\n",
+    "\n",
+    "    # Melt for density plotting\n",
+    "    df_melted = df_plot.melt(\n",
+    "        id_vars=['benchmark', 'experiment'],\n",
+    "        value_vars=bucket_cols,\n",
+    "        var_name=f'{y} Bucket',\n",
+    "        value_name='Frequency'\n",
+    "    )\n",
+    "\n",
+    "    # Extract bucket start as numeric value for plotting\n",
+    "    df_melted['Bucket Start'] = df_melted[f'{y} Bucket'].apply(lambda x: int(x.split('::')[-1].split('-')[0]))\n",
+    "\n",
+    "    for benchmark, group_df  in df_melted.groupby('benchmark'):\n",
+    "        # Plot\n",
+    "        plt.figure(figsize=(10, 6))\n",
+    "        sns.kdeplot(\n",
+    "            data=group_df,\n",
+    "            x='Bucket Start',\n",
+    "            weights='Frequency',\n",
+    "            hue='experiment',\n",
+    "            common_norm=False,\n",
+    "            fill=True,\n",
+    "            alpha=0.4,\n",
+    "            linewidth=1.5, \n",
+    "            cut=0,\n",
+    "            clip=(0, df_melted['Bucket Start'].max())\n",
+    "        )\n",
+    "\n",
+    "        \n",
+    "        plt.title(f\" {label if label else y} Density by Experiment benchmark: {benchmark}\")\n",
+    "        plt.xlabel(f'{label if label else y} Bucket Start')\n",
+    "        plt.ylabel('Density (weighted)')\n",
+    "        plt.tight_layout()\n",
+    "        if len(save_as) > 0:\n",
+    "            plt.savefig(f\"density_plots/{save_as}.png\",  bbox_inches='tight')\n",
+    "        plt.show()\n",
+    "\n",
+    "\n",
+    "def plot_incremental_from_dfs(\n",
+    "    dfs, labels,\n",
+    "    benchmarks_col=\"benchmark\", experiment_col=\"experiment\", value_col=\"IPC\",\n",
+    "    ylabel=\"Performance (IPC)\", title=None , save_as=\"\", ymax=None,\n",
+    "    show_means=False  # NEW argument\n",
+    "):\n",
+    "    \"\"\"\n",
+    "    Plot grouped stacked bars from multiple DataFrames, each representing\n",
+    "    a performance scenario. The DataFrames must have columns:\n",
+    "    [benchmark, experiment, IPC].\n",
+    "    \n",
+    "    Parameters:\n",
+    "        dfs (list[pd.DataFrame]): list of DataFrames in scenario order\n",
+    "        labels (list[str]): names of each scenario\n",
+    "    \"\"\"\n",
+    "    experiments = dfs[0][experiment_col].unique()\n",
+    "    \n",
+    "    # --- aggregate each df ---\n",
+    "    grouped_dfs = []\n",
+    "    for df in dfs:\n",
+    "        grouped = df.groupby([benchmarks_col, experiment_col])[value_col].mean().reset_index()\n",
+    "        grouped_dfs.append(grouped)\n",
+    "    \n",
+    "    # --- merge into one wide table ---\n",
+    "    merged = grouped_dfs[0].rename(columns={value_col: labels[0]})\n",
+    "    for i, df in enumerate(grouped_dfs[1:], 1):\n",
+    "        merged = merged.merge(df.rename(columns={value_col: labels[i]}),\n",
+    "                              on=[benchmarks_col, experiment_col])\n",
+    "    \n",
+    "    # --- optionally add a \"Mean\" benchmark ---\n",
+    "    if show_means:\n",
+    "        mean_rows = []\n",
+    "        for exp in experiments:\n",
+    "            sub = merged[merged[experiment_col] == exp]\n",
+    "            means = sub[labels].mean()\n",
+    "            row = {benchmarks_col: \"Mean\", experiment_col: exp}\n",
+    "            row.update(means.to_dict())\n",
+    "            mean_rows.append(row)\n",
+    "        merged = pd.concat([merged, pd.DataFrame(mean_rows)], ignore_index=True)\n",
+    "    \n",
+    "    benchmarks = merged[benchmarks_col].unique()\n",
+    "    x = np.arange(len(benchmarks))\n",
+    "    width = 0.75 / len(experiments)\n",
+    "    fig, ax = plt.subplots(figsize=(16, 7))\n",
+    "    \n",
+    "    # softer, more readable colors\n",
+    "    colors = [\"#183ab8\", \"#bf8970\", \"#FF2400\", \"#ffd700\", \"#50C878\", \"#C0C0C0\", \"#7851a9\", \"#f77b07\"]\n",
+    "    hatches = [\"//\", \"\\\\\\\\\", \"xx\", \"oo\", \"..\", \"++\"]  \n",
+    "    \n",
+    "    # --- plot each experiment as grouped stacked bar ---\n",
+    "    for i, exp in enumerate(experiments):\n",
+    "        sub = merged[merged[experiment_col]==exp]\n",
+    "        xpos = x - 0.375 + i*width + width/2\n",
+    "        \n",
+    "        bottom = np.zeros(len(sub))\n",
+    "        prev_vals = bottom.copy()\n",
+    "        for j, lab in enumerate(labels):\n",
+    "            vals = sub[lab].to_numpy()\n",
+    "            inc = vals - prev_vals  # true difference\n",
+    "\n",
+    "            # only plot positive increments\n",
+    "            pos_mask = inc > 0\n",
+    "            if np.any(pos_mask):\n",
+    "                ax.bar(\n",
+    "                    xpos[pos_mask], inc[pos_mask], width,\n",
+    "                    bottom=prev_vals[pos_mask],\n",
+    "                    color=colors[j % len(colors)],\n",
+    "                    hatch=hatches[i % len(hatches)],\n",
+    "                    edgecolor=\"black\"\n",
+    "                )\n",
+    "\n",
+    "            prev_vals = vals  # always update (so baseline is correct)\n",
+    "\n",
+    "    # --- formatting ---\n",
+    "    ax.set_ylabel(ylabel, fontsize=12)\n",
+    "    if title: ax.set_title(title, fontsize=14)\n",
+    "    ax.set_xticks(x)\n",
+    "    ax.set_xticklabels(benchmarks, rotation=45, ha=\"right\")\n",
+    "    ax.yaxis.grid(True, which=\"both\", linestyle=\"--\", alpha=0.6)\n",
+    "    ax.margins(x=0.12)\n",
+    "    if ymax:\n",
+    "        ax.set_ylim(0, ymax)\n",
+    "    \n",
+    "    # --- build combined legend ---\n",
+    "    scenario_patches = [mpatches.Patch(color=colors[j], label=lab) for j, lab in enumerate(labels)]\n",
+    "    hatch_patches = [mpatches.Patch(facecolor=\"white\", edgecolor=\"black\", hatch=hatches[i], label=exp) \n",
+    "                     for i, exp in enumerate(experiments)]\n",
+    "    ax.legend(handles=scenario_patches + hatch_patches, title=\"Legend\", ncol=2)\n",
+    "    \n",
+    "    if len(save_as) > 0:\n",
+    "        plt.savefig(f\"IPC_plots/{save_as}.png\",  bbox_inches='tight')\n",
+    "    plt.tight_layout()\n",
+    "    plt.show()\n",
+    "\n",
+    "def plotMPKIStack(\n",
+    "    df, \n",
+    "    mean=False, \n",
+    "    experiment_like='', \n",
+    "    exclude_benchs=[], \n",
+    "    only_benchs=[], \n",
+    "    rm_components=[],  \n",
+    "    save_as=\"\", \n",
+    "    legend_out=False,\n",
+    "    show_means=False,   # NEW ARGUMENT (already added)\n",
+    "    reverse_experiments=False  # NEW ARGUMENT\n",
+    "):\n",
+    "    latest = df\n",
+    "    latest = filterBenchExperiment(latest, experiment_like, exclude_benchs, only_benchs)\n",
+    "\n",
+    "    if mean:\n",
+    "        latest = latest.groupby(['experiment', 'benchmark'])[\n",
+    "            ['btb_mpki', 'cond_mpki', 'mpki', 'bp_mpki', 'icache_mpki']\n",
+    "        ].mean().reset_index(drop=False)\n",
+    "    else:\n",
+    "        latest = latest.loc[latest.groupby(['experiment', 'benchmark'])['finalTick'].idxmax()]\n",
+    "        \n",
+    "    component_cols = {\n",
+    "        \"icache_mpki\": \"L1ICache\",\n",
+    "        \"btb_mpki\": \"BTB\",\n",
+    "        \"bp_mpki\": \"Branch Predictor\"\n",
+    "    }\n",
+    "\n",
+    "    for comp in rm_components:\n",
+    "        component_cols.pop(comp, None) \n",
+    "     \n",
+    "    # --- Optionally add \"Mean\" row for each experiment ---\n",
+    "    if show_means:\n",
+    "        mean_rows = []\n",
+    "        for exp in latest['experiment'].unique():\n",
+    "            sub = latest[latest['experiment'] == exp]\n",
+    "            means = sub[list(component_cols.keys())].mean()\n",
+    "            row = {\"benchmark\": \"Mean\", \"experiment\": exp}\n",
+    "            row.update(means.to_dict())\n",
+    "            mean_rows.append(row)\n",
+    "        latest = pd.concat([latest, pd.DataFrame(mean_rows)], ignore_index=True)\n",
+    "\n",
+    "    # Prepare axis categories\n",
+    "    latest['benchmark'] = latest['benchmark'].astype(str)\n",
+    "    benchmarks = list(latest['benchmark'].unique())\n",
+    "    if \"Mean\" in benchmarks:\n",
+    "        benchmarks = [b for b in benchmarks if b != \"Mean\"] + [\"Mean\"]\n",
+    "    \n",
+    "    experiments = sorted(latest['experiment'].unique())\n",
+    "    if reverse_experiments:   # <-- invert order if requested\n",
+    "        experiments = experiments[::-1]\n",
+    "    \n",
+    "    # Define colors and hatches\n",
+    "    colors = {\n",
+    "        \"L1ICache\": \"#14a31b\",\n",
+    "        \"BTB\": \"#3734eb\",\n",
+    "        \"Branch Predictor\": \"#eb5334\"\n",
+    "    }\n",
+    "    hatches = ['', '*', 'o', '-', 'o+*', '-+.', '.', '+']  \n",
+    "    \n",
+    "    # Plot\n",
+    "    fig, ax = plt.subplots(figsize=(16, 6))\n",
+    "    bar_width = 0.6 / len(experiments)\n",
+    "    x = np.arange(len(benchmarks))\n",
+    "    \n",
+    "    # Offset bars by experiment\n",
+    "    for i, exp in enumerate(experiments):\n",
+    "        subset = latest[latest['experiment'] == exp].set_index('benchmark')\n",
+    "        bottom = np.zeros(len(benchmarks))\n",
+    "        for col, name in component_cols.items():\n",
+    "            values = [subset.loc[b, col] if b in subset.index else 0 for b in benchmarks]\n",
+    "            bars = ax.bar(\n",
+    "                x + i * bar_width, values, bar_width,\n",
+    "                bottom=bottom,\n",
+    "                color=colors[name],\n",
+    "                hatch=hatches[i % len(hatches)],\n",
+    "                edgecolor='black',\n",
+    "                label=name if i == 0 else None\n",
+    "            )\n",
+    "            bottom += values\n",
+    "    \n",
+    "    # Labels and ticks\n",
+    "    ax.set_xticks(x + bar_width * (len(experiments) - 1) / 2)\n",
+    "    ax.set_xticklabels(benchmarks, rotation=45, ha='right')\n",
+    "    ax.set_ylabel(\"MPKI\")\n",
+    "    ax.set_title(\"MPKI Breakdown per Benchmark (Per Experiment)\")\n",
+    "    \n",
+    "    # Legends\n",
+    "    component_handles = [plt.Rectangle((0, 0), 1, 1, color=colors[name]) for name in component_cols.values()]\n",
+    "    experiment_handles = [\n",
+    "        plt.Rectangle((0, 0), 1, 1, facecolor='white', edgecolor='black', hatch=hatches[i % len(hatches)])\n",
+    "        for i, exp in enumerate(experiments)\n",
+    "    ]\n",
+    "    all_handles = component_handles + experiment_handles\n",
+    "    all_labels = list(component_cols.values()) + experiments\n",
+    "    \n",
+    "    ax.legend(\n",
+    "        all_handles, all_labels,\n",
+    "        title=\"Component / Experiment\",\n",
+    "        loc='best',\n",
+    "        bbox_to_anchor=(1.0, 0.5) if legend_out else None\n",
+    "    )\n",
+    "    \n",
+    "    plt.tight_layout()\n",
+    "    if len(save_as) > 0:\n",
+    "        plt.savefig(f\"mpki_plots/{save_as}.png\", bbox_inches='tight')\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    " \n",
+    "\n",
+    "def plotCPIStack(df, mean = False, experiment_like = '', exclude_benchs = [], only_benchs = [], m_cpi = False, detailed = True,  save_as = \"\", ymax=None):\n",
+    "    latest = df\n",
+    "\n",
+    "    latest = filterBenchExperiment(latest, experiment_like, exclude_benchs, only_benchs)\n",
+    "\n",
+    "    cpi_var = 'm_cpi' if m_cpi else 'cpi'\n",
+    "\n",
+    "    \n",
+    "    \n",
+    "    if mean:\n",
+    "        latest = latest.groupby(['experiment', 'benchmark'])[['TopDownl1_retiring', 'TopDownL1_frontendBound', 'TopDownL1_badSpeculation', 'TopDownl1_backendBound', \"TopDownL2_Mispredicts\", \"TopDownL2_machineClears\", \"TopDownL2_serializingStalls\", \"TopDownL2_coreBound\", \"TopDownL2_memoryBound\" , cpi_var]].mean().reset_index(drop=False)\n",
+    "    else:\n",
+    "        latest = latest.loc[latest.groupby(['experiment', 'benchmark'])['finalTick'].idxmax()]\n",
+    "        \n",
+    " \n",
+    "    #latest = latest.map(lambda x: 0 if isinstance(x, (int, float)) and x < 0 else x)\n",
+    "        \n",
+    "    #Print the CPI Stack for specific benchmarks and experiments\n",
+    "\n",
+    "    # Define component columns and friendly display names\n",
+    "    component_cols_detailed = {\n",
+    "        \"TopDownl1_retiring\": \"Retiring\",\n",
+    "        \"TopDownL1_frontendBound\": \"Frontend Bound\",\n",
+    "        \"TopDownL2_Mispredicts\": \"BadSpec Mispredicts\",\n",
+    "        \"TopDownL2_machineClears\" : \"BadSpec memOrderViolations\",\n",
+    "        \"TopDownL2_serializingStalls\" : \"Backend SerializingStalls\",\n",
+    "        \"TopDownL2_coreBound\" : \"Backend coreBound\",\n",
+    "        \"TopDownL2_memoryBound\": \"Backend memoryBound\"\n",
+    "    }\n",
+    "\n",
+    "    component_cols_L1 = {\n",
+    "        \"TopDownl1_retiring\": \"Retiring\",\n",
+    "        \"TopDownL1_frontendBound\": \"Frontend Bound\",\n",
+    "        \"TopDownL1_badSpeculation\": \"Bad Speculation\",\n",
+    "        \"TopDownl1_backendBound\": \"Backend Bound\"\n",
+    "    }\n",
+    "\n",
+    "    component_cols = component_cols_detailed if detailed else component_cols_L1\n",
+    "    \n",
+    "    # Multiply each component % by total CPI\n",
+    "    for col in component_cols:\n",
+    "        latest[col] = latest[col] * latest[cpi_var]\n",
+    "    \n",
+    "    # Prepare axis categories\n",
+    "    latest['benchmark'] = latest['benchmark'].astype(str)\n",
+    "    benchmarks = sorted(latest['benchmark'].unique())\n",
+    "    experiments = sorted(latest['experiment'].unique())\n",
+    "    \n",
+    "    # Define colors and hatches\n",
+    "    colors = {\n",
+    "        \"Retiring\": \"#7B3294\",\n",
+    "        \"Frontend Bound\": \"#F0E442\",\n",
+    "        \"Bad Speculation\": \"#999999\",\n",
+    "        \"BadSpec Mispredicts\": \"#999999\",\n",
+    "        \"BadSpec memOrderViolations\": \"#0dd617\",\n",
+    "        \"Backend Bound\": \"#D7191C\",\n",
+    "        \"Backend SerializingStalls\": \"#f7760c\",\n",
+    "        \"Backend coreBound\": \"#1939d7\", \n",
+    "        \"Backend memoryBound\" : \"#D7191C\"\n",
+    "    }\n",
+    "    hatches = ['', '*', 'oo', 'X', 'x', '.', '.', '+']  # One per experiment\n",
+    "    \n",
+    "        # Plot\n",
+    "    fig, ax = plt.subplots(figsize=(16, 6))\n",
+    "    bar_width = 0.6 / len(experiments)\n",
+    "    x = np.arange(len(benchmarks))\n",
+    "    \n",
+    "    # Offset bars by experiment\n",
+    "    for i, exp in enumerate(experiments):\n",
+    "        subset = latest[latest['experiment'] == exp].set_index('benchmark')\n",
+    "\n",
+    "        for j, b in enumerate(benchmarks):\n",
+    "            if b not in subset.index:\n",
+    "                continue\n",
+    "\n",
+    "            total_cpi = subset.loc[b, cpi_var]\n",
+    "            bottom = 0.0\n",
+    "            drawn = 0.0\n",
+    "\n",
+    "            for col, name in component_cols.items():\n",
+    "                value = subset.loc[b, col] if col in subset.columns else 0.0\n",
+    "\n",
+    "                # Nothing left to allocate? skip\n",
+    "                if drawn >= total_cpi:\n",
+    "                    continue\n",
+    "\n",
+    "                # If the component would overshoot, clip it\n",
+    "                if drawn + value > total_cpi:\n",
+    "                    value = max(0, total_cpi - drawn)\n",
+    "\n",
+    "                if value > 0:\n",
+    "                    ax.bar(\n",
+    "                        j + i * bar_width,\n",
+    "                        value,\n",
+    "                        bar_width,\n",
+    "                        bottom=bottom,\n",
+    "                        color=colors[name],\n",
+    "                        hatch=hatches[i % len(hatches)],\n",
+    "                        edgecolor='black',\n",
+    "                        label=name if (i == 0 and bottom == 0) else None\n",
+    "                    )\n",
+    "\n",
+    "                # Update positions\n",
+    "                drawn += value\n",
+    "                bottom += value\n",
+    "    \n",
+    "    # Labels and ticks\n",
+    "    ax.set_xticks(x + bar_width * (len(experiments) - 1) / 2)\n",
+    "    ax.set_xticklabels(benchmarks, rotation=45, ha='right')\n",
+    "    ax.set_ylabel(\"CPI\")\n",
+    "    ax.set_title(\"CPI Breakdown per Benchmark (Per Experiment)\")\n",
+    "    if ymax:\n",
+    "        ax.set_ylim(0, ymax)\n",
+    "    \n",
+    "    # Legend for components (only once)\n",
+    "    # Component legend (already exists)\n",
+    "    component_handles = [\n",
+    "        plt.Rectangle((0, 0), 1, 1, color=colors[name])\n",
+    "        for name in component_cols.values()\n",
+    "    ]\n",
+    "    \n",
+    "    # Experiment legend (hatch only, gray fill to avoid visual conflict)\n",
+    "    experiment_handles = [\n",
+    "        plt.Rectangle((0, 0), 1, 1, facecolor='white', edgecolor='black', hatch=hatches[i % len(hatches)])\n",
+    "        for i, exp in enumerate(experiments)\n",
+    "    ]\n",
+    "    \n",
+    "    # Combine both\n",
+    "    all_handles = component_handles + experiment_handles\n",
+    "    all_labels = list(component_cols.values()) + experiments\n",
+    "    \n",
+    "    # Show both in a unified legend\n",
+    "    ax.legend(\n",
+    "        all_handles,\n",
+    "        all_labels,\n",
+    "        title=\"Component / Experiment\",\n",
+    "        loc='center left',\n",
+    "        bbox_to_anchor=(1.0, 0.5)  # x=1.0 means just outside the plot on the right\n",
+    "    )\n",
+    "    \n",
+    "    plt.tight_layout()\n",
+    "    if len(save_as) > 0:\n",
+    "            plt.savefig(f\"CPI_Stacks/{save_as}.png\",  bbox_inches='tight')\n",
+    "    plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
author	Steve Bambou <99360731+FullbusterSteve@users.noreply.github.com>	2025-09-17 13:32:40 +0200
committer	GitHub <noreply@github.com>	2025-09-17 13:32:40 +0200
commit	ad15cd7496c75b0d6440145ffd8f28c20c366093 (patch)
tree	875ac7a9d578d45b61289b9c16b6cd6d038a6ef5 /archive/2025/summer
parent	fa80a2aca83ba56daa24cbfa737d0c40df8950ee (diff)
download	research-work-archive-artifacts-ad15cd7496c75b0d6440145ffd8f28c20c366093.tar.gz research-work-archive-artifacts-ad15cd7496c75b0d6440145ffd8f28c20c366093.zip