diff options
| author | Raphael Dichler <raphael@dichler.com> | 2025-05-11 20:03:31 +0200 |
|---|---|---|
| committer | Raphael Dichler <raphael@dichler.com> | 2025-05-11 20:03:31 +0200 |
| commit | 2370f0923f896623c279b005b96f51121fc29700 (patch) | |
| tree | 23a1aea824148307c9d67e3e6ebaa230736bdf98 | |
| parent | e546f71869e32e88716c4ad05e0254ea3352a668 (diff) | |
| download | research-work-archive-artifacts-2370f0923f896623c279b005b96f51121fc29700.tar.gz research-work-archive-artifacts-2370f0923f896623c279b005b96f51121fc29700.zip | |
add thesis
47 files changed, 2597 insertions, 0 deletions
diff --git a/archive/2024/winter/bsc_dichler/.gitignore b/archive/2024/winter/bsc_dichler/.gitignore new file mode 100644 index 000000000..6f562eb38 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/.gitignore @@ -0,0 +1,7 @@ +*.csv +**/results/ + +**/__pycache__/ + +result/** +!result/.gitkeep diff --git a/archive/2024/winter/bsc_dichler/LICENSE.md b/archive/2024/winter/bsc_dichler/LICENSE.md new file mode 100644 index 000000000..de5b07143 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/LICENSE.md @@ -0,0 +1,7 @@ +Copyright 2025 Raphael Dichler + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/archive/2024/winter/bsc_dichler/README.md b/archive/2024/winter/bsc_dichler/README.md new file mode 100644 index 000000000..6cf7f97d2 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/README.md @@ -0,0 +1,72 @@ +# Memory Tagging Extension + +This tool and benchmark are designed to evaluate the performance overhead introduced with the usage of MTE. + +## Usage + +You can run the existing benchmark both locally and on a remote machine. The software requires certain dependencies to run: +- Makefile +- gcc (with support for `-march=armv8-5-a+memtag`) +- rsync + +### Benchmarks + +#### Locally + +To run any experiment locally, use the following command: +```bash +./scripts/main.py --experiment <experiment> locally +``` + +#### Remote + +To run the experiments on a remote machine, valid SSH keys must be established. The connection must allow allocating a TTY to execute and build the benchmark. Moreover, the remote machine must have the required software installed. + +From the root directory of the repo, run any benchmark with the following command: +```bash +./scripts/main.py --experiment <experiment> remote --remote-user <ssh-user> --remote-host <ssh-host> +``` + +If an SSH jump host is needed, use the following command: +```bash +./scripts/main.py --experiment <experiment> remote --remote-user <ssh-user> --remote-host <ssh-host> --remote-jump-user <ssh-jump-user> --remote-jump-host <ssh-jump-host> --remote-jump-port <ssh-jump-port> +``` + +### Additional Information + +- By default, experiments are extracted from `/experiments/`. If your experiment is located in another directory, specify the base directory with `--base`. +- By default, all results of the experiments are placed into `/results`. To change this behavior, specify `--result`. +- When an experiment is executed, a context on the machine is created. This context is a temporary directory on the system. By default, after the execution of the benchmark, this directory is not cleaned up. To enable cleanup, specify `--cleanup`. + +## Structure + +The structure of the experiment follows this pattern: + +- Experiments are placed inside the `--base` directory (default: `/experiments/`) and are referred to by the name of the experiment. For example, if we have an experiment named `my-experiment`, a directory inside `/experiments/my-experiment/` must exist. +- On execution of an experiment, the `--base` directory is synced to the context of the current run, then: + - We run the Makefile inside this folder. This Makefile must perform all steps required to run the experiment (like compiling and executing the actual experiment). + - The results produced by the experiment are placed into `/experiments/my-experiment/results/`. These results are then synced back to the results directory specified (`/results/my-experiment/`). + - The results are processed. + +## Adding Additional Experiments + +To add a custom experiment called `my-experiment`, follow these steps. Assume that the standard `--base` directory (`/experiments/`) is used. + +1. Create a new directory: + ```bash + cd <root-of-repo>/experiments/ + mkdir my-experiment + ``` + +2. Add a Makefile inside the directory (this will be called to perform any necessary work). For example, use `make` to compile and run your experiment. + +3. Add a Python script to evaluate the results of the benchmark. + +4. Add the name of your experiment to the literals specified in this file: + ```python + Experiments = Literal[ ... "my-experiment" ] + ``` + +5. To add your analysis step, include `"my-experiment"` in the relevant section. How you evaluate the results is up to you, but you must add a function call to the `experiments` dictionary. + +Now you can run your experiment! diff --git a/archive/2024/winter/bsc_dichler/experiments/.gitignore b/archive/2024/winter/bsc_dichler/experiments/.gitignore new file mode 100644 index 000000000..0954694ab --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/.gitignore @@ -0,0 +1,2 @@ +*.csv +**/results/ diff --git a/archive/2024/winter/bsc_dichler/experiments/allocator/allocator.c b/archive/2024/winter/bsc_dichler/experiments/allocator/allocator.c new file mode 100644 index 000000000..67681c923 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/allocator/allocator.c @@ -0,0 +1,81 @@ + +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> + +#include <sys/auxv.h> +#include <sys/prctl.h> + +#ifdef MTE +#include <arm_acle.h> +#endif + +#ifdef ASYNC +#define MTE_MODE PR_MTE_TCF_ASYNC +#else +#define MTE_MODE PR_MTE_TCF_SYNC +#endif + +#define insert_random_tag(ptr) ({ \ + uint64_t __val; \ + asm("irg %0, %1" : "=r" (__val) : "r" (ptr)); \ + __val; \ +}) + +extern void *mtag_tag_region(void *ptr, size_t size); + +void *alloc(size_t num_bytes) { +#ifdef MTE + unsigned long hwcap2 = getauxval(AT_HWCAP2); + + /* check if MTE is present */ + if (!(hwcap2 & HWCAP2_MTE)) { + perror("MTE is not present"); + exit(EXIT_FAILURE); + } + + /* + * Enable MTE with synchronous checking + */ + if (prctl(PR_SET_TAGGED_ADDR_CTRL, + PR_TAGGED_ADDR_ENABLE | MTE_MODE | (0xfffe << PR_MTE_TAG_SHIFT), + 0, 0, 0)) { + perror("prctl() failed"); + exit(EXIT_FAILURE); + } +#endif + +#ifdef MTE + void *mem = mmap(NULL, num_bytes, PROT_READ | PROT_WRITE | PROT_MTE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); +#else + void *mem = mmap(NULL, num_bytes, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); +#endif + + if (mem == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + +#ifdef MTE + /* Tag the mmap area */ + void *p = __arm_mte_create_random_tag(mem, 0); + mem = mtag_tag_region(p, num_bytes); +#endif + + // ensure every page is loaded befor benchmarking + uint8_t *pp = mem; + for (size_t i = 0; i < num_bytes; i += 1024) { + pp[i]++; + } + + return mem; +} + +void alloc_free(void *ptr, size_t num_bytes) { + if (munmap(ptr, num_bytes) == -1) { + perror("mmap"); + exit(EXIT_FAILURE); + } +} diff --git a/archive/2024/winter/bsc_dichler/experiments/allocator/allocator.h b/archive/2024/winter/bsc_dichler/experiments/allocator/allocator.h new file mode 100644 index 000000000..cf4c4447f --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/allocator/allocator.h @@ -0,0 +1,11 @@ + +#ifndef ALLOCATOR_H +#define ALLOCATOR_H + +#include <stddef.h> + +void *alloc(size_t num_bytes); + +void alloc_free(void *ptr, size_t num_bytes); + +#endif diff --git a/archive/2024/winter/bsc_dichler/experiments/allocator/tag_region.S b/archive/2024/winter/bsc_dichler/experiments/allocator/tag_region.S new file mode 100644 index 000000000..7f5c37475 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/allocator/tag_region.S @@ -0,0 +1,106 @@ +/* Copyright (C) 2020-2025 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Assumptions: + * + * ARMv8-a, AArch64, MTE, LP64 ABI. + * + * Interface contract: + * Address is 16 byte aligned and size is multiple of 16. + * Returns the passed pointer. + * The memory region may remain untagged if tagging is not enabled. + */ +.arch armv8.5-a +.arch_extension memtag + +.global mtag_tag_region + +#define dstin x0 +#define count x1 +#define dst x2 +#define dstend x3 +#define tmp x4 +#define zva_val x4 + +mtag_tag_region: + add dstend, dstin, count + + cmp count, 96 + b.hi set_long + + tbnz count, 6, set96 + + /* Set 0, 16, 32, or 48 bytes. */ + lsr tmp, count, 5 + add tmp, dstin, tmp, lsl 4 + cbz count, end + stg dstin, [dstin] + stg dstin, [tmp] + stg dstin, [dstend, -16] +end: + ret + + .p2align 4 + /* Set 64..96 bytes. Write 64 bytes from the start and + 32 bytes from the end. */ +set96: + st2g dstin, [dstin] + st2g dstin, [dstin, 32] + st2g dstin, [dstend, -32] + ret + + .p2align 4 + /* Size is > 96 bytes. */ +set_long: + cmp count, 160 + b.lo no_zva + +#ifndef SKIP_ZVA_CHECK + mrs zva_val, dczid_el0 + and zva_val, zva_val, 31 + cmp zva_val, 4 /* ZVA size is 64 bytes. */ + b.ne no_zva +#endif + st2g dstin, [dstin] + st2g dstin, [dstin, 32] + bic dst, dstin, 63 + sub count, dstend, dst /* Count is now 64 too large. */ + sub count, count, 128 /* Adjust count and bias for loop. */ + + .p2align 4 +zva_loop: + add dst, dst, 64 + dc gva, dst + subs count, count, 64 + b.hi zva_loop + st2g dstin, [dstend, -64] + st2g dstin, [dstend, -32] + ret + +no_zva: + sub dst, dstin, 32 /* Dst is biased by -32. */ + sub count, count, 64 /* Adjust count for loop. */ +no_zva_loop: + st2g dstin, [dst, 32] + st2g dstin, [dst, 64]! + subs count, count, 64 + b.hi no_zva_loop + st2g dstin, [dstend, -64] + st2g dstin, [dstend, -32] + ret + diff --git a/archive/2024/winter/bsc_dichler/experiments/cas/Makefile b/archive/2024/winter/bsc_dichler/experiments/cas/Makefile new file mode 100644 index 000000000..52c0718b1 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/cas/Makefile @@ -0,0 +1,22 @@ +CC = gcc +CFLAGS = -O3 +CFLAGS += -Wa,--noexecstack + +default: + make clean run run_mte + bash benchmark.sh + +run: + $(CC) $(CFLAGS) -o run \ + main.c \ + ../allocator/allocator.c \ + +run_mte: + $(CC) $(CFLAGS) -o run_mte \ + -DMTE -march=armv8.5-a+memtag \ + main.c \ + ../allocator/allocator.c \ + ../allocator/tag_region.S \ + +clean: + rm -rf run run_mte results/ *.png diff --git a/archive/2024/winter/bsc_dichler/experiments/cas/benchmark.sh b/archive/2024/winter/bsc_dichler/experiments/cas/benchmark.sh new file mode 100644 index 000000000..f8537e49b --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/cas/benchmark.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +set -o errexit # when a command fails, exist +set -o nounset # fail when accessing an unset variable +set -o pipefail # fail pipeline if any command errors + +ARRAY_SIZES=(1 2 3 4) + +run_experiment() { + local executable=$1 + local output_file=$2 + + rm -f "$output_file" + touch "$output_file" + echo "cores;duration" >> "$output_file" + for size in "${ARRAY_SIZES[@]}"; do + taskset -c 4,5,6,7 "$executable" 10 "$size" | tee -a "$output_file" + sleep 5s + done +} + +rm -rf results +mkdir results + +run_experiment "./run" "results/run.csv" +run_experiment "./run_mte" "results/run_mte.csv" diff --git a/archive/2024/winter/bsc_dichler/experiments/cas/main.c b/archive/2024/winter/bsc_dichler/experiments/cas/main.c new file mode 100644 index 000000000..a24269e31 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/cas/main.c @@ -0,0 +1,90 @@ +#include <stdatomic.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> +#include <sys/types.h> +#include <time.h> +#include <pthread.h> +#include <stdatomic.h> +#include "../allocator/allocator.h" + +atomic_int *shared_var; +pthread_barrier_t barrier; + +#define NUM_ITERATION 100000000 + +static void* cas_benchmark(void* arg) { + int expected; + pthread_barrier_wait(&barrier); + + for (int i = 0; i < NUM_ITERATION; i++) { + expected = atomic_load(shared_var); + while (!atomic_compare_exchange_weak(shared_var, &expected, expected + 1)) { + expected = atomic_load(shared_var); + } + } + + return NULL; +} + + +struct run_args { + uint64_t duration; +}; + +static void *run(void *args) { + struct run_args *a = (struct run_args *) args; + pthread_barrier_wait(&barrier); + + struct timespec s, e; + clock_gettime(CLOCK_MONOTONIC_RAW, &s); + cas_benchmark(NULL); + clock_gettime(CLOCK_MONOTONIC_RAW, &e); + + a->duration = 1e9 * (e.tv_sec - s.tv_sec) + (e.tv_nsec - s.tv_nsec); + + return NULL; +} + + +int main(int argc, char *args[]) { + if (argc != 3) { + printf("Usage: %s <iterations> <threads>\n", args[0]); + exit(EXIT_FAILURE); + } + + size_t iterations = atoll(args[1]); + size_t num_threads = atoll(args[2]); + + for (size_t i = 0; i < iterations; ++i) { + shared_var = alloc(16); + *shared_var = 0; + + pthread_barrier_init(&barrier, NULL, num_threads); + + struct run_args threads_args[num_threads]; + pthread_t threads[num_threads]; + + for (size_t j = 0; j < num_threads; ++j) { + pthread_create(&threads[j], NULL, run, &threads_args[j]); + } + + for (size_t j = 0; j < num_threads; ++j) { + pthread_join(threads[j], NULL); + } + assert(*shared_var == num_threads * NUM_ITERATION); + + uint64_t duration = 0; + for (size_t j = 0; j < num_threads; ++j) { + duration += threads_args[j].duration; + } + + printf("%ld;%ld\n", num_threads, duration); + + alloc_free(shared_var, 16); + } + +} diff --git a/archive/2024/winter/bsc_dichler/experiments/contiguous/Makefile b/archive/2024/winter/bsc_dichler/experiments/contiguous/Makefile new file mode 100644 index 000000000..ee32b1d33 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/contiguous/Makefile @@ -0,0 +1,32 @@ +CC=gcc +CFLAGS=-O3 +CFLAGS += -Wa,--noexecstack + +default: + make clean load16 load16_mte + bash benchmark.sh + +load16: + $(CC) $(CFLAGS) -o load16 \ + main.c \ + ../allocator/allocator.c \ + load.S + +load16_mte: + $(CC) $(CFLAGS) -o load16_mte \ + -DMTE -march=armv8.5-a+memtag \ + main.c \ + ../allocator/allocator.c \ + ../allocator/tag_region.S \ + load.S + +load16_mte_async: + $(CC) $(CFLAGS) -o load16_mte_async \ + -DMTE -DASYNC -march=armv8.5-a+memtag \ + main.c \ + ../allocator/allocator.c \ + ../allocator/tag_region.S \ + load.S + +clean: + rm -f load_mte_async load16 load16_mte diff --git a/archive/2024/winter/bsc_dichler/experiments/contiguous/benchmark.sh b/archive/2024/winter/bsc_dichler/experiments/contiguous/benchmark.sh new file mode 100755 index 000000000..66279a03a --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/contiguous/benchmark.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +set -o errexit # when a command fails, exist +set -o nounset # fail when accessing an unset variable +set -o pipefail # fail pipeline if any command errors + +CORE=5 + +for ((i = 0; i <= 19; i++)); do + ARRAY_SIZES16+=($(( (2**i) * 1024 / 64 ))) +done + +run_experiment() { + local executable=$1 + local output_file=$2 + + rm -f "$output_file" + touch "$output_file" + echo "len;duration;runs" >> "$output_file" + + for size in "${ARRAY_SIZES16[@]}"; do + taskset -c "$CORE" "$executable" 10 "$size" 1 | tee -a "$output_file" + sleep 5s + done +} + +rm -rf results +mkdir results + +run_experiment "./load16" "results/load16.csv" +run_experiment "./load16_mte" "results/load16_mte.csv" + diff --git a/archive/2024/winter/bsc_dichler/experiments/contiguous/load.S b/archive/2024/winter/bsc_dichler/experiments/contiguous/load.S new file mode 100644 index 000000000..ff0df1217 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/contiguous/load.S @@ -0,0 +1,20 @@ + +#ifdef __x86_64__ +.intel_syntax noprefix +#endif + +.global benchmark + +benchmark: +#ifdef __aarch64__ + cbz x1, .LBB11_2 +.LBB11_1: + + ldr w8, [x0] + subs x1, x1, #1 + + str w8, [x0], #4 + b.ne .LBB11_1 +.LBB11_2: + ret +#endif diff --git a/archive/2024/winter/bsc_dichler/experiments/contiguous/main.c b/archive/2024/winter/bsc_dichler/experiments/contiguous/main.c new file mode 100644 index 000000000..87de6c418 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/contiguous/main.c @@ -0,0 +1,49 @@ +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> +#include <time.h> +#include "../allocator/allocator.h" + +struct node { + uint64_t val; + uint8_t arr[16 - sizeof(uint64_t)]; +}; +static_assert(sizeof(struct node) == 16, "cannot run experiment, size of node is not 16"); + +extern void benchmark(uint32_t *ptr, size_t len); + +#define WARUM_UP 5 + +int main(int argc, char *args[]) { + if (argc != 4) { + printf("Usage: %s <iterations> <len> <runs>\n", args[0]); + exit(EXIT_FAILURE); + } + + size_t iterations = atoll(args[1]); + size_t len = atoll(args[2]); + size_t runs = atoll(args[3]); + + // we only test with lengths of power of 2 - this is done to simplify the benchmark + assert((len & (len - 1)) == 0); + + for (size_t i = 0; i < iterations + WARUM_UP; ++i) { + size_t num_bytes = len * 64; + uint8_t *region = alloc(num_bytes); + + struct timespec s, e; + clock_gettime(CLOCK_MONOTONIC_RAW, &s); + benchmark((uint32_t *)region, num_bytes / 4); + clock_gettime(CLOCK_MONOTONIC_RAW, &e); + + if (i > WARUM_UP) { + uint64_t duration = 1e9 * (e.tv_sec - s.tv_sec) + (e.tv_nsec - s.tv_nsec); + printf("%ld;%ld;%ld\n", len, duration, runs); + } + + alloc_free(region, num_bytes); + } + +} diff --git a/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/Makefile b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/Makefile new file mode 100644 index 000000000..4e908bd1c --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/Makefile @@ -0,0 +1,47 @@ +CC=gcc +CFLAGS=-O3 +CFLAGS += -Wa,--noexecstack + +default: + make clean tag_malloc tag_stg tag_st2g load store ldg + bash benchmark.sh + +tag_malloc: + $(CC) $(CFLAGS) -o tag_malloc \ + -DMTE -DMALLOC -march=armv8.5-a+memtag \ + main.c \ + ../allocator/tag_region.S + +tag_stg: + $(CC) $(CFLAGS) -o tag_stg \ + -DMTE -DSTG -march=armv8.5-a+memtag \ + stg.S \ + main.c + +tag_st2g: + $(CC) $(CFLAGS) -o tag_st2g \ + -DMTE -DST2G -march=armv8.5-a+memtag \ + st2g.S \ + main.c + +ldg: + $(CC) $(CFLAGS) -o ldg \ + -DMTE -DLDG -march=armv8.5-a+memtag \ + ../allocator/tag_region.S \ + ldg.S \ + main.c + +load: + $(CC) $(CFLAGS) -o load \ + -DLOAD \ + load.S \ + main.c + +store: + $(CC) $(CFLAGS) -o store \ + -DSTORE \ + store.S \ + main.c + +clean: + rm -rf results/ *.png tag_malloc tag_stg tag_st2g load store ldg diff --git a/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/benchmark.sh b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/benchmark.sh new file mode 100644 index 000000000..a400d1a46 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/benchmark.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -o errexit # when a command fails, exist +set -o nounset # fail when accessing an unset variable +set -o pipefail # fail pipeline if any command errors + +ARRAY=(536870912) +CORE=5 + +run_experiment() { + local executable=$1 + local output_file=$2 + + rm -f "$output_file" + touch "$output_file" + echo "size;duration;ops;processed" >> "$output_file" + for size in "${ARRAY[@]}"; do + taskset -c "$CORE" "$executable" 10 "$size" | tee -a "$output_file" + done +} + +rm -rf results +mkdir results + +run_experiment "./tag_stg" "results/tag_stg.csv" +run_experiment "./tag_st2g" "results/tag_st2g.csv" +run_experiment "./tag_malloc" "results/tag_malloc.csv" +run_experiment "./load" "results/load.csv" +run_experiment "./store" "results/store.csv" +run_experiment "./ldg" "results/ldg.csv" +run_experiment "./irg" "results/irg.csv" diff --git a/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/ldg.S b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/ldg.S new file mode 100644 index 000000000..0209102ad --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/ldg.S @@ -0,0 +1,13 @@ + +.global benchmark_ldg + +benchmark_ldg: + cbz x1, .LBB8_2 +.LBB8_1: + subs x1, x1, #16 + + ldg x4, [x0] + add x0, x0, #16 + b.ne .LBB8_1 +.LBB8_2: + ret diff --git a/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/load.S b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/load.S new file mode 100644 index 000000000..3d857389f --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/load.S @@ -0,0 +1,31 @@ + +.global benchmark_load + +benchmark_load: + /* + cbz x2, .LBB4_3 + mov x8, xzr + sub x9, x1, #1 +.LBB4_2: + and x10, x8, x9 + add x8, x8, #1 + lsl x9, x10, #2 + cmp x2, x8 + + ldr x11, [x0, x10] + + b.ne .LBB4_2 +.LBB4_3: + ret + + */ + + cbz x1, .LBB8_2 +.LBB8_1: + ldr w8, [x0] + add x0, x0, #4 + + subs x1, x1, #1 + b.ne .LBB8_1 +.LBB8_2: + ret diff --git a/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/main.c b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/main.c new file mode 100644 index 000000000..fa481414c --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/main.c @@ -0,0 +1,176 @@ +#include <stddef.h> +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/mman.h> +#include <stdint.h> + +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <time.h> + +#ifdef MTE +#include <arm_acle.h> +#endif + +#define insert_random_tag(ptr) ({ \ + uint64_t __val; \ + asm("irg %0, %1" : "=r" (__val) : "r" (ptr)); \ + __val; \ +}) + +#ifdef ASYNC +#define MTE_MODE PR_MTE_TCF_ASYNC +#else +#define MTE_MODE PR_MTE_TCF_SYNC +#endif + +#define set_tag(tagged_addr) do { \ + asm volatile("stg %0, [%0]" : : "r" (tagged_addr) : "memory"); \ +} while (0) + + +#define set_tag32(tagged_addr) do { \ + asm volatile("st2g %0, [%0]" : : "r" (tagged_addr) : "memory"); \ +} while (0) + +#ifdef MALLOC +extern void *mtag_tag_region(void *ptr, size_t size); +#endif + +#ifdef STG +extern void benchmark_stg(uint8_t *ptr, size_t len); +#endif + +#ifdef ST2G +void benchmark_st2g(uint8_t *ptr, size_t len); +#endif + +#ifdef LOAD +extern void benchmark_load(uint32_t *arr, size_t len); +#endif + +#ifdef STORE +extern void benchmark_store(uint32_t *arr, size_t len, size_t add); +#endif + +#ifdef LDG +extern void benchmark_ldg(uint8_t *ptr, size_t len); +extern void *mtag_tag_region(void *ptr, size_t size); +#endif + +#ifdef IRG +extern void benchmark_irg(uint8_t *ptr, size_t len); +#endif + + +void benchmark(size_t bytes) { +#ifdef MTE + void *mem = mmap(NULL, bytes, PROT_READ | PROT_WRITE | PROT_MTE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); +#else + void *mem = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); +#endif + uint8_t *pp = mem; + for (size_t i = 0; i < bytes; i += 1024) { + pp[i]++; + } +#ifdef LDG + void *p = __arm_mte_create_random_tag(mem, 0); + mem = mtag_tag_region(mem, bytes); +#endif + + size_t bytes_processed = 0; + size_t ops = 0; + + struct timespec s, e; + clock_gettime(CLOCK_MONOTONIC_RAW, &s); +#ifdef MALLOC + void *p = __arm_mte_create_random_tag(mem, 0); + mem = mtag_tag_region(mem, bytes); + ops = bytes / 32; + bytes_processed = bytes / 16; +#endif + +#ifdef STG + benchmark_stg(mem, bytes); + ops = bytes / 16; + bytes_processed = bytes / 16; +#endif + +#ifdef ST2G + benchmark_st2g(mem, bytes); + ops = bytes / 32; + bytes_processed = bytes / 16; +#endif + +#ifdef LOAD + benchmark_load((uint32_t *) pp, bytes / 4); + ops = bytes / 4; + bytes_processed = bytes; +#endif + +#ifdef STORE + benchmark_store((uint32_t *) pp, bytes / 4, 37); + ops = bytes / 4; + bytes_processed = bytes; +#endif + +#ifdef LDG + benchmark_ldg(mem, bytes); + ops = bytes / 16; + bytes_processed = bytes / 16; +#endif + +#ifdef IRG + benchmark_irg(mem, bytes); + ops = bytes / 4; + bytes_processed = bytes; +#endif + + clock_gettime(CLOCK_MONOTONIC_RAW, &e); + + assert(bytes_processed > 0); + assert(ops > 0); + uint64_t duration = 1e9 * (e.tv_sec - s.tv_sec) + (e.tv_nsec - s.tv_nsec); + printf("%ld;%ld;%ld;%ld\n", bytes, duration, ops, bytes_processed); + + if (munmap(mem, bytes) == -1) { + perror("mmap"); + exit(EXIT_FAILURE); + } +} + + +int main(int argc, char *args[]) { + if (argc != 3) { + printf("Usage: %s <iterations> <size>\n", args[0]); + exit(EXIT_FAILURE); + } + +#ifdef MTE + unsigned long hwcap2 = getauxval(AT_HWCAP2); + + /* check if MTE is present */ + if (!(hwcap2 & HWCAP2_MTE)) { + perror("MTE is not present"); + exit(EXIT_FAILURE); + } + + /* + * Enable MTE with synchronous checking + */ + if (prctl(PR_SET_TAGGED_ADDR_CTRL, + PR_TAGGED_ADDR_ENABLE | MTE_MODE | (0xfffe << PR_MTE_TAG_SHIFT), + 0, 0, 0)) { + perror("prctl() failed"); + exit(EXIT_FAILURE); + } +#endif + + size_t iterations = atoll(args[1]); + size_t size = atoll(args[2]); + + for (size_t i = 0; i < iterations; ++i) { + benchmark(size); + } +} diff --git a/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/st2g.S b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/st2g.S new file mode 100644 index 000000000..799214291 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/st2g.S @@ -0,0 +1,13 @@ + +.global benchmark_st2g + +benchmark_st2g: + cbz x1, .LBB8_2 +.LBB8_1: + subs x1, x1, #32 + + st2g x0, [x0] + add x0, x0, #32 + b.ne .LBB8_1 +.LBB8_2: + ret diff --git a/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/stg.S b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/stg.S new file mode 100644 index 000000000..bed32ccf3 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/stg.S @@ -0,0 +1,13 @@ + +.global benchmark_stg + +benchmark_stg: + cbz x1, .LBB8_2 +.LBB8_1: + subs x1, x1, #16 + + stg x0, [x0] + add x0, x0, #16 + b.ne .LBB8_1 +.LBB8_2: + ret diff --git a/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/store.S b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/store.S new file mode 100644 index 000000000..2eae8084d --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/contiguous_tagging/store.S @@ -0,0 +1,29 @@ + +.global benchmark_store + +benchmark_store: + /* + cbz x1, .LBB2_3 + mov x8, xzr + sub x9, x1, #1 +.LBB2_2: + and x10, x8, x9 + subs x1, x1, #1 + add x8, x8, #4 + + str w10, [x0, x10, lsl #2] + + b.ne .LBB2_2 +.LBB2_3: + ret + */ + cbz x1, .LBB8_2 +.LBB8_1: + subs x1, x1, #1 + + str w2, [x0] + add x0, x0, #4 + + b.ne .LBB8_1 +.LBB8_2: + ret diff --git a/archive/2024/winter/bsc_dichler/experiments/malloc/Makefile b/archive/2024/winter/bsc_dichler/experiments/malloc/Makefile new file mode 100644 index 000000000..9de6ef289 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/malloc/Makefile @@ -0,0 +1,25 @@ +CC=gcc +CFLAGS=-O3 +CFLAGS += -Wa,--noexecstack + +default: + make clean malloc malloc_mte + bash benchmark.sh + +malloc: + $(CC) $(CFLAGS) -o malloc \ + -DALLOCATIONS=16777216 \ + main.c \ + ../allocator/allocator.c + +malloc_mte: + $(CC) $(CFLAGS) -o malloc_mte \ + -DALLOCATIONS=16777216 \ + -DMTE -march=armv8.5-a+memtag \ + main.c \ + ../allocator/tag_region.S \ + ../allocator/allocator.c + + +clean: + rm -rf results/ malloc malloc_mte diff --git a/archive/2024/winter/bsc_dichler/experiments/malloc/benchmark.sh b/archive/2024/winter/bsc_dichler/experiments/malloc/benchmark.sh new file mode 100755 index 000000000..9b640d6c2 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/malloc/benchmark.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -o errexit # when a command fails, exist +set -o nounset # fail when accessing an unset variable +set -o pipefail # fail pipeline if any command errors + +ARRAY=(16 128 256 1024 2048 4096 8192) +CORE=5 + +run_experiment() { + local executable=$1 + local output_file=$2 + local option=$3 + + rm -f "$output_file" + touch "$output_file" + echo "size;allocation;duration_allocation;duration_deallocation" >> "$output_file" + + for size in "${ARRAY[@]}"; do + MEMTAG_OPTIONS="$option" taskset -c "$CORE" "$executable" 9 "$size" | tee -a "$output_file" + done +} + +rm -rf results +mkdir results + +run_experiment "./malloc" "results/malloc.csv" "off" +run_experiment "./malloc_mte" "results/malloc_mte.csv" "sync" diff --git a/archive/2024/winter/bsc_dichler/experiments/malloc/main.c b/archive/2024/winter/bsc_dichler/experiments/malloc/main.c new file mode 100644 index 000000000..58932af78 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/malloc/main.c @@ -0,0 +1,83 @@ +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <time.h> +#include "../allocator/allocator.h" + +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <sys/prctl.h> + +#define PR_TAGGED_ADDR_ENABLE (1UL << 0) + +#define WARUM_UP 1 + +struct bench { + size_t allocation; + size_t bytes_to_alloc; + void *ptrs[ALLOCATIONS]; +}; + +void benchmark(struct bench *bench) { + for (size_t i = 0; i < bench->allocation; ++i) { + bench->ptrs[i] = malloc(bench->bytes_to_alloc); + } +} + +int main(int argc, char *args[]) { + if (argc != 3) { + printf("Usage: %s <iterations> <size>\n", args[0]); + exit(EXIT_FAILURE); + } + + size_t iterations = atoll(args[1]); + size_t size = atoll(args[2]); + + size_t allocations = (1 << 28) / size; + // ensure we size is a multiple of 16, to match granularity + assert(size % 16 == 0); + assert(allocations * size < (1 << 29)); + + void *ptr = malloc(1024); + + struct bench *bench= alloc(sizeof(struct bench)); + bench->allocation = allocations; + bench->bytes_to_alloc = size; + for (size_t i = 0; i < iterations + WARUM_UP; ++i) { + + struct timespec s, e1, e2; + clock_gettime(CLOCK_MONOTONIC_RAW, &s); + benchmark(bench); + clock_gettime(CLOCK_MONOTONIC_RAW, &e1); + + for (size_t j = 0; j < bench->allocation; ++j) { + free(bench->ptrs[j]); + } + + clock_gettime(CLOCK_MONOTONIC_RAW, &e2); + + if (i > WARUM_UP) { + uint64_t duration_allocation = 1e9 * (e1.tv_sec - s.tv_sec) + (e1.tv_nsec - s.tv_nsec); + uint64_t duration_deallocation = 1e9 * (e2.tv_sec - e1.tv_sec) + (e2.tv_nsec - e1.tv_nsec); + + printf("%ld;%ld;%ld;%ld\n", size, allocations, duration_allocation, duration_deallocation); + } + + } + + alloc_free(bench, sizeof(struct bench)); + free(ptr); +} diff --git a/archive/2024/winter/bsc_dichler/experiments/non_contiguous/Makefile b/archive/2024/winter/bsc_dichler/experiments/non_contiguous/Makefile new file mode 100644 index 000000000..f9e844482 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/non_contiguous/Makefile @@ -0,0 +1,35 @@ +CC=gcc +CFLAGS=-O3 +CFLAGS += -Wa,--noexecstack + +default: + make clean load load_mte load_mte_async + bash benchmark.sh + +load_mte: + $(CC) $(CFLAGS) -o load_mte \ + -DMTE -march=armv8.5-a+memtag \ + main.c \ + ../random_selection/random_selector.c \ + ../allocator/allocator.c \ + ../allocator/tag_region.S \ + load.S + +load_mte_async: + $(CC) $(CFLAGS) -o load_mte_async \ + -DMTE -DASYNC -march=armv8.5-a+memtag \ + main.c \ + ../random_selection/random_selector.c \ + ../allocator/allocator.c \ + ../allocator/tag_region.S \ + load.S + +load: + $(CC) $(CFLAGS) -o load \ + main.c \ + ../random_selection/random_selector.c \ + ../allocator/allocator.c \ + load.S + +clean: + rm -rf load load_mte load_mte_async diff --git a/archive/2024/winter/bsc_dichler/experiments/non_contiguous/benchmark.sh b/archive/2024/winter/bsc_dichler/experiments/non_contiguous/benchmark.sh new file mode 100755 index 000000000..d97d966e8 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/non_contiguous/benchmark.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -o errexit # when a command fails, exist +set -o nounset # fail when accessing an unset variable +set -o pipefail # fail pipeline if any command errors + +CORE=5 +ARRAY_SIZES=(64 128 256 512 1024 2048 2560 3072 4096 6144 8192 10240 12288 16384 24576 32768 49152 65536 98304 131072 196608 262144 524288 786432 1048576 1572864 2097152 4194304 8388608 16777216 33554432 50331648 67108864) + +run_experiment() { + local executable=$1 + local output_file=$2 + + rm -f "$output_file" + touch "$output_file" + echo "len;duration" >> "$output_file" + for size in "${ARRAY_SIZES[@]}"; do + taskset -c "$CORE" "$executable" 10 "$size" 1337 | tee -a "$output_file" + sleep 5s + done +} + +rm -rf results +mkdir results + +run_experiment "./load" "results/load.csv" +run_experiment "./load_mte" "results/load_mte.csv" + diff --git a/archive/2024/winter/bsc_dichler/experiments/non_contiguous/load.S b/archive/2024/winter/bsc_dichler/experiments/non_contiguous/load.S new file mode 100644 index 000000000..1ec843dd0 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/non_contiguous/load.S @@ -0,0 +1,19 @@ + +#ifdef __x86_64__ +.intel_syntax noprefix +#endif + +.global benchmark + +benchmark: +#ifdef __aarch64__ + cbz x1, .LBB2_2 +.LBB2_1: + subs x1, x1, #1 + + ldr x0, [x0] + + b.ne .LBB2_1 +.LBB2_2: + ret +#endif diff --git a/archive/2024/winter/bsc_dichler/experiments/non_contiguous/main.c b/archive/2024/winter/bsc_dichler/experiments/non_contiguous/main.c new file mode 100644 index 000000000..3f7d12be8 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/non_contiguous/main.c @@ -0,0 +1,84 @@ +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> +#include <time.h> +#include "../allocator/allocator.h" +#include "../random_selection/random_selector.h" + +struct node { + struct node *next; + uint8_t arr[16 - sizeof(struct node *)]; +}; +static_assert(sizeof(struct node) == 16, "cannot run experiment, size of node is not 16"); + +struct experiment { + uint8_t *region; + struct node *start; + size_t num_nodes; +}; + +static struct experiment node_init(size_t num_nodes, size_t seed) { + assert(num_nodes != 0); + uint8_t *region = alloc(num_nodes * 16); + struct node *node_arr = (struct node *) region; + + random_selector_setup(seed); + selector_t s = random_selector_init(num_nodes); + + uint64_t idx = random_selector_pop(&s); + struct node *start = &node_arr[idx]; + struct node *prev = start; + + while (random_selector_is_empty(s) == false) { + idx = random_selector_pop(&s); + struct node *n = &node_arr[idx]; + + prev->next = n; + prev = n; + } + prev->next = start; + + random_selector_deinit(s); + + return (struct experiment) { + .region = region, + .start = start, + .num_nodes = num_nodes, + }; + +} + +static void node_deinit(struct experiment experiment) { + alloc_free(experiment.region, experiment.num_nodes); +} + +extern void benchmark(struct node *start, size_t steps); + +int main(int argc, char *args[]) { + if (argc != 4) { + printf("Usage: %s <iterations> <len> <seed>\n", args[0]); + exit(EXIT_FAILURE); + } + + size_t iterations = atoll(args[1]); + size_t len = atoll(args[2]); + size_t seed = atoll(args[3]); + + for (size_t i = 0; i < iterations; ++i) { + struct experiment exp = node_init(len, seed); + + struct timespec s, e; + clock_gettime(CLOCK_MONOTONIC_RAW, &s); + benchmark(exp.start, 100000000); + clock_gettime(CLOCK_MONOTONIC_RAW, &e); + + uint64_t duration = 1e9 * (e.tv_sec - s.tv_sec) + (e.tv_nsec - s.tv_nsec); + printf("%ld;%ld\n", len, duration); + + node_deinit(exp); + } + +} diff --git a/archive/2024/winter/bsc_dichler/experiments/parallel_non_contiguous/Makefile b/archive/2024/winter/bsc_dichler/experiments/parallel_non_contiguous/Makefile new file mode 100644 index 000000000..700ca1f3d --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/parallel_non_contiguous/Makefile @@ -0,0 +1,42 @@ +CC=gcc +CFLAGS=-O3 +CFLAGS += -Wa,--noexecstack + +default: + make clean write write_mte + bash benchmark.sh + +load_mte: + $(CC) $(CFLAGS) -o load_mte \ + -DREAD_ONLY -DMTE -march=armv8.5-a+memtag \ + main.c \ + ../random_selection/random_selector.c \ + ../allocator/allocator.c \ + ../allocator/tag_region.S \ + load.S + +load: + $(CC) $(CFLAGS) -o load \ + -DREAD_ONLY \ + main.c \ + ../random_selection/random_selector.c \ + ../allocator/allocator.c \ + load.S + +write: + $(CC) $(CFLAGS) -o write \ + -DREAD_WRITE \ + main.c \ + ../random_selection/random_selector.c \ + ../allocator/allocator.c + +write_mte: + $(CC) $(CFLAGS) -o write_mte \ + -DREAD_WRITE -DMTE -march=armv8.5-a+memtag \ + main.c \ + ../random_selection/random_selector.c \ + ../allocator/allocator.c \ + ../allocator/tag_region.S + +clean: + rm -rf results/ *.png load load_mte write write_mte diff --git a/archive/2024/winter/bsc_dichler/experiments/parallel_non_contiguous/benchmark.sh b/archive/2024/winter/bsc_dichler/experiments/parallel_non_contiguous/benchmark.sh new file mode 100755 index 000000000..bce3c3cc4 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/parallel_non_contiguous/benchmark.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +set -o errexit # when a command fails, exist +set -o nounset # fail when accessing an unset variable +set -o pipefail # fail pipeline if any command errors + +CORE=4,5,6,7 +ARRAY_SIZES=(63 128 256 512 1024 2048 2560 3072 4096 6144 8192 10240 12288 16384 24576 32768 49152 65536 98304 131072 196608 262144 524288 786432 1048576 1572864 2097152 4194304 8388608 16777216 33554432 50331648 67108864) +THREADS=(1 2 3 4) + +run_experiment() { + local executable=$1 + local output_file=$2 + + rm -f "$output_file" + touch "$output_file" + echo "len;duration;threads" >> "$output_file" + for num_threads in "${THREADS[@]}"; do + for size in "${ARRAY_SIZES[@]}"; do + taskset -c "$CORE" "$executable" 10 "$size" 1337 "$num_threads" | tee -a "$output_file" + sleep 5s + done + done +} + +rm -rf results +mkdir results + +run_experiment "./write" "results/write.csv" +run_experiment "./write_mte" "results/write_mte.csv" + +run_experiment "./load" "results/load.csv" +run_experiment "./load_mte" "results/load_mte.csv" diff --git a/archive/2024/winter/bsc_dichler/experiments/parallel_non_contiguous/load.S b/archive/2024/winter/bsc_dichler/experiments/parallel_non_contiguous/load.S new file mode 100644 index 000000000..dc35609db --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/parallel_non_contiguous/load.S @@ -0,0 +1,19 @@ +#ifdef __x86_64__ +.intel_syntax noprefix +#endif + +.global benchmark + + +benchmark: +#ifdef __aarch64__ + cbz x1, .LBB2_2 +.LBB2_1: + subs x1, x1, #1 + + ldr x0, [x0] + + b.ne .LBB2_1 +.LBB2_2: + ret +#endif diff --git a/archive/2024/winter/bsc_dichler/experiments/parallel_non_contiguous/main.c b/archive/2024/winter/bsc_dichler/experiments/parallel_non_contiguous/main.c new file mode 100644 index 000000000..77a2d5bbd --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/parallel_non_contiguous/main.c @@ -0,0 +1,188 @@ +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> +#include <time.h> +#include <pthread.h> +#include "../allocator/allocator.h" +#include "../random_selection/random_selector.h" + +pthread_barrier_t barrier; + +struct node { + struct node *next; + uint8_t arr[16 - sizeof(struct node *)]; +}; + +static_assert(sizeof(struct node) == 16, "cannot run experiment, size of node is not 16"); + +struct experiment { + uint8_t *region; + struct node *start1; + struct node *start2; + size_t num_nodes; +}; + +static struct experiment node_init(size_t num_nodes, size_t seed) { + assert(num_nodes != 0); + uint8_t *region = alloc(num_nodes * 16); + struct node *node_arr = (struct node *) region; + + random_selector_setup(seed); + selector_t s = random_selector_init(num_nodes); + + uint64_t idx = random_selector_pop(&s); + struct node *start1 = &node_arr[idx]; + struct node *prev = start1; + + num_nodes -= 2; + size_t first_block = num_nodes / 2; + for (size_t i = 0; i < first_block; ++i) { + idx = random_selector_pop(&s); + struct node *n = &node_arr[idx]; + + prev->next = n; + prev = n; + } + prev->next = start1; + + + idx = random_selector_pop(&s); + struct node *start2 = &node_arr[idx]; + prev = start2; + + size_t second_block = num_nodes - first_block; + for (size_t i = 0; i < second_block; ++i) { + idx = random_selector_pop(&s); + struct node *n = &node_arr[idx]; + + prev->next = n; + prev = n; + } + prev->next = start2; + + assert(random_selector_is_empty(s)); + random_selector_deinit(s); + + return (struct experiment) { + .region = region, + .start1 = start1, + .start2 = start2, + .num_nodes = num_nodes, + }; +} + +static void node_deinit(struct experiment experiment) { + alloc_free(experiment.region, experiment.num_nodes); +} + +#ifdef READ_ONLY +extern void benchmark(struct node *start, size_t steps); +#endif + +#ifdef READ_WRITE +void benchmark(struct node *start, size_t steps, size_t write_on) { + size_t wow = sizeof(struct node); + for (size_t i = 0; i < steps; i++) { + if (i % write_on == 0) { + start->arr[1] += wow; + } + + start = start->next; + } +} +#endif + +struct run_args { + struct node *start; + uint64_t duration; +}; + +static struct node *jump_forward(struct experiment *exp, size_t jumps) { + + + struct node *start = NULL; + if (jumps % 2 == 0) { + start = exp->start1; + } else { + start = exp->start2; + } + + for (size_t i = 0; i < jumps; ++i) { + start = start->next; + } + + return start; +} + +static void *run(void *args) { + struct run_args *a = (struct run_args *) args; + pthread_barrier_wait(&barrier); + + struct timespec s, e; + clock_gettime(CLOCK_MONOTONIC_RAW, &s); +#ifdef READ_ONLY + benchmark(a->start, 100000000); +#endif + +#ifdef READ_WRITE + benchmark(a->start, 100000000, 1); +#endif + + clock_gettime(CLOCK_MONOTONIC_RAW, &e); + + + + a->duration = 1e9 * (e.tv_sec - s.tv_sec) + (e.tv_nsec - s.tv_nsec); + + return NULL; +} + +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) + +int main(int argc, char *args[]) { + if (argc != 5) { + printf("Usage: %s <iterations> <len> <seed> <threads>\n", args[0]); + exit(EXIT_FAILURE); + } + + size_t iterations = atoll(args[1]); + size_t len = atoll(args[2]); + size_t seed = atoll(args[3]); + size_t num_threads = atoll(args[4]); + size_t jumps[num_threads]; + + for (size_t i = 0; i < num_threads; ++i) { + jumps[i] = i + 1; + } + + for (size_t i = 0; i < iterations; ++i) { + struct experiment exp = node_init(len, seed); + + pthread_barrier_init(&barrier, NULL, num_threads); + pthread_t threads[num_threads]; + struct run_args threads_args[num_threads]; + + for (size_t j = 0; j < num_threads; ++j) { + struct node *start = jump_forward(&exp, jumps[j]); + assert(start != NULL); + threads_args[j].start = start; + pthread_create(&threads[j], NULL, run, &threads_args[j]); + } + + for (size_t j = 0; j < num_threads; ++j) { + pthread_join(threads[j], NULL); + } + + uint64_t duration = 0; + for (size_t j = 0; j < num_threads; ++j) { + duration = MAX(threads_args[j].duration, duration); + } + printf("%ld;%ld;%ld\n", len, duration, num_threads); + + node_deinit(exp); + } + +} diff --git a/archive/2024/winter/bsc_dichler/experiments/random_selection/random_selector.c b/archive/2024/winter/bsc_dichler/experiments/random_selection/random_selector.c new file mode 100644 index 000000000..90d651b39 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/random_selection/random_selector.c @@ -0,0 +1,55 @@ +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <stdbool.h> + +#include "random_selector.h" + + +void random_selector_setup(size_t seed) { + srand(seed); +} + +selector_t random_selector_init(size_t size) { + assert(size < UINT64_MAX); + + uint64_t *arr = malloc(sizeof(*arr) * size); + if (!arr) { + perror("OOM - failed to init selector"); + exit(EXIT_FAILURE); + } + + for (size_t i = 0; i < size; ++i) { + arr[i] = i; + } + + return (selector_t) { + .arr = arr, + .size = size, + }; +} + +void random_selector_deinit(selector_t s) { + free(s.arr); +} + +uint64_t random_selector_pop(selector_t *s) { + assert(s->size != 0); + + uint32_t next = rand() % s->size; + s->size -= 1; + + uint64_t element = s->arr[next]; + assert(element != UINT64_MAX); + + s->arr[next] = s->arr[s->size]; + s->arr[s->size] = UINT64_MAX; + + return element; +} + + +bool random_selector_is_empty(selector_t s) { + return s.size == 0; +} diff --git a/archive/2024/winter/bsc_dichler/experiments/random_selection/random_selector.h b/archive/2024/winter/bsc_dichler/experiments/random_selection/random_selector.h new file mode 100644 index 000000000..6f36777a5 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/experiments/random_selection/random_selector.h @@ -0,0 +1,26 @@ + + +#ifndef RANDOM_SELECTOR_H +#define RANDOM_SELECTOR_H + +#include <stddef.h> +#include <stdint.h> + +typedef struct selector { + uint64_t *arr; + size_t size; +} selector_t; + + +void random_selector_setup(size_t seed); + +selector_t random_selector_init(size_t size); + +void random_selector_deinit(selector_t s); + +size_t random_selector_pop(selector_t *s); + +bool random_selector_is_empty(selector_t s); + + +#endif // RANDOM_SELECTOR_H diff --git a/archive/2024/winter/bsc_dichler/scripts/context.py b/archive/2024/winter/bsc_dichler/scripts/context.py new file mode 100644 index 000000000..78cbbb689 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/scripts/context.py @@ -0,0 +1,137 @@ +from pathlib import Path +from typing import Protocol, Self +from execution import run +from log import info + + +class Context(Protocol): + + def context(self) -> str: ... + + async def sync_to(self, experiment_root: Path) -> None: ... + + async def run(self, cmd: str | list[str]) -> None: ... + + async def sync_back(self, remote_path: Path, local_path: Path) -> None: ... + + async def __aenter__(self) -> Self: ... + + async def __aexit__(self, exc_type, exc_value, exc_tb): ... + + +class RemoteContext: + __remote_connection: str + __jump_connection: str | None + __cwd: str | None + __delete: bool + + def __init__( + self, + remote_user: str, + remote_host: str, + jump_user: str | None = None, + jump_host: str | None = None, + jump_port: str | None = None, + delete: bool = False, + ) -> None: + self.__cwd = None + self.__delete = delete + self.__remote_connection = f"{remote_user}@{remote_host}" + self.__jump_connection = None + + if any([jump_host, jump_user, jump_port]): + assert all([jump_host, jump_user]), "cannot define jump partially" + self.__jump_connection = f"ssh -J {jump_user}@{jump_host}" + if jump_port: + self.__jump_connection += f" -p {jump_port}" + + def context(self) -> str: + return f"Remote Context: {self.__cwd}" + + async def sync_to(self, experiment_root: Path) -> None: + cmd = "rsync -avz" + if self.__jump_connection: + cmd += f" -e '{self.__jump_connection}'" + + cmd += f" {experiment_root} {self.__remote_connection}:{self.__cwd}" + info(f"cmd: {cmd}") + _ = await run(cmd) + + def __connection_cmd(self) -> str: + connection_cmd = f"ssh {self.__remote_connection}" + if self.__jump_connection: + connection_cmd = f"{self.__jump_connection} {self.__remote_connection}" + + return connection_cmd + + async def run(self, cmd: str | list[str]) -> None: + assert self.__cwd, "Cannot run command without setup" + if isinstance(cmd, list): + cmd = " && ".join(cmd) + + cmd = cmd.strip() + assert cmd.startswith("&&") == False, "already prefixed with '&&'" + + _ = await run([f"{self.__connection_cmd()} 'cd {self.__cwd} && {cmd}'"]) + + async def sync_back(self, remote_path: Path, local_path: Path) -> None: + cmd = "rsync -avz" + if self.__jump_connection: + cmd += f" -e '{self.__jump_connection}'" + + cmd += f"{self.__remote_connection}:{self.__cwd}/{remote_path} {local_path}" + _ = await run(cmd) + + async def __aenter__(self) -> Self: + info("__aenter__") + directory = await run(f"{self.__connection_cmd()} 'mktemp -d'") + assert directory, "failed to create directory on remote system" + self.__cwd = directory + return self + + async def __aexit__(self, exc_type, exc_value, exc_tb): + assert self.__cwd, "incorrect state, cannot exit context which was not entered" + if self.__delete: + _ = await run(f"{self.__connection_cmd()} 'rm -rf {self.__cwd}'") + + +class LocalContext: + __cwd: str | None + __delete: bool + + def __init__( + self, + delete: bool = False, + ) -> None: + self.__cwd = None + self.__delete = delete + + def context(self) -> str: + return f"Local Context: {self.__cwd}" + + async def sync_to(self, experiment_root: Path) -> None: + _ = await run(f"rsync -av {experiment_root} {self.cwd}") + + async def run(self, cmd: str | list[str]) -> None: + assert self.cwd, "Cannot run command without setup" + if isinstance(cmd, list): + cmd = " && ".join(cmd) + + cmd = cmd.strip() + assert cmd.startswith("&&") == False, "already prefixed with '&&'" + + _ = await run([f"cd {self.cwd} && {cmd}"]) + + async def sync_back(self, remote_path: Path, local_path: Path) -> None: + _ = await run(f"rsync -av {self.cwd}/{remote_path} {local_path}") + + async def __aenter__(self) -> Self: + directory = await run("mktemp -d") + assert directory, "failed to create directory on remote system" + self.cwd = directory + return self + + async def __aexit__(self, exc_type, exc_value, exc_tb): + assert self.cwd, "incorrect state, cannot exit context which was not entered" + if self.__delete: + _ = await run(f"rm -rf {self.__cwd}") diff --git a/archive/2024/winter/bsc_dichler/scripts/execution.py b/archive/2024/winter/bsc_dichler/scripts/execution.py new file mode 100644 index 000000000..51c1db4be --- /dev/null +++ b/archive/2024/winter/bsc_dichler/scripts/execution.py @@ -0,0 +1,40 @@ +import asyncio +from logging import info, error + + +async def run(command: str | list[str], cwd: str | None = None) -> str: + if isinstance(command, list): + command = " && ".join(command) + + command = command.strip() + assert command.startswith("&&") == False, "already prefixed with '&&'" + + info(command) + cp = await asyncio.create_subprocess_shell( + cmd=command, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=cwd, + ) + + async def read_stream(stream, logger): + lines = "" + while True: + line = await stream.readline() + if not line: + return lines + line = line.decode().strip() + lines += line + logger(line) + + output: tuple[str, str] = await asyncio.gather( + read_stream(cp.stdout, info), + read_stream(cp.stderr, error), + ) + + _ = await cp.wait() + if cp.returncode == 1: + error("Failed.") + exit(1) + + return output[0] diff --git a/archive/2024/winter/bsc_dichler/scripts/experiments.py b/archive/2024/winter/bsc_dichler/scripts/experiments.py new file mode 100644 index 000000000..0077dbad2 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/scripts/experiments.py @@ -0,0 +1,50 @@ +from typing import Callable, Literal +import matplotlib as mpl +import matplotlib.pyplot as plt +from pathlib import Path + +from plot import parallel_non_contiguous +from plot import cas +from plot import non_contiguous +from plot import contiguous +from plot import malloc +from plot import contiguous_tagging + +Experiments = Literal[ + "cas", + "contiguous", + "non_contiguous", + "contiguous_tagging", + "malloc", + "parallel_non_contiguous", +] +PlottingFunction = Callable[[Path, str], None] + +experiments: dict[str, PlottingFunction] = { + "cas": lambda r, t: cas.plot(r, t), + "contiguous": lambda r, t: contiguous.plot(r, t), + "non_contiguous": lambda r, t: non_contiguous.plot(r, t), + "contiguous_tagging": lambda r, t: contiguous_tagging.plot(r, t), + "malloc": lambda r, t: malloc.plot(r, t), + "parallel_non_contiguous": lambda r, t: parallel_non_contiguous.plot(r, t), +} + + +def experiment_choices() -> list[str]: + return list(experiments.keys()) + + +def plot(output_root: Path, experiment: Experiments, format: Literal["pdf", "png"]): + rcParams = { + "font.family": "serif", + "font.size": 11, + "pgf.rcfonts": False, + } + + if format == "pdf": + mpl.use("pdf") + plt.rcParams["text.latex.preamble"] = r"\renewcommand{\mathdefault}[1][]{}" + rcParams["pgf.texsystem"] = "pdflatex" + + mpl.rcParams.update(rcParams) + experiments[experiment](output_root, format) diff --git a/archive/2024/winter/bsc_dichler/scripts/log.py b/archive/2024/winter/bsc_dichler/scripts/log.py new file mode 100644 index 000000000..0bb7fb438 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/scripts/log.py @@ -0,0 +1,20 @@ +def log(prefix: str, msg: str, intend: int = 0): + intend_offset = "\t" * intend + for line in msg.split("\n"): + if len(line) == 0: + continue + line = prefix + intend_offset + msg + print(line, flush=True) + + +def info(msg: str, intend: int = 1): + intend = intend + 1 + GREEN = "\033[32m" + RESET = "\033[0m\033[39m" + log(prefix=GREEN + "stdout>" + RESET, msg=msg, intend=intend) + + +def error(msg: str): + RED = "\033[31m" + RESET = "\033[0m\033[39m" + log(prefix=RED + "stderr>" + RESET, msg=msg, intend=1) diff --git a/archive/2024/winter/bsc_dichler/scripts/main.py b/archive/2024/winter/bsc_dichler/scripts/main.py new file mode 100644 index 000000000..330f8bdfd --- /dev/null +++ b/archive/2024/winter/bsc_dichler/scripts/main.py @@ -0,0 +1,178 @@ +#!/usr/bin/env bash + +import argparse +import asyncio +from pathlib import Path +import os +import sys + +from context import Context, RemoteContext, LocalContext +from log import error, info + +import experiments + + +async def run( + ctx: Context, + local_result_path: Path, + experiment_base: Path, + experiment: experiments.Experiments, +) -> None: + info(str(ctx)) + """ + async with ctx as client: + info(f"Open new context ({client.context()})") + + info("Copy data to context") + await client.sync_to(experiment_root=experiment_base) + + info("Build and run benchmarks on remote machine") + await client.run([f"cd experiments/{experiment}", "make"]) + + info("Copy results back to local machine") + await client.sync_back( + remote_path=Path(f"experiments/{experiment}/results/"), + local_path=local_result_path, + ) + + info("Done running experiments") + """ + + info("Start plotting measurements") + experiments.plot(local_result_path, experiment, format="pdf") + + info("--------- Done ---------") + + +def run_local(args): + experiment = args.experiment + experiment_root = args.base + result_root = args.result + cleanup = args.cleanup + + ctx = LocalContext( + delete=cleanup, + ) + asyncio.run( + run( + ctx=ctx, + local_result_path=result_root, + experiment_base=experiment_root, + experiment=experiment, + ) + ) + pass + + +def run_remote(args): + remote_user = args.remote_user or os.getenv("MTE_REMOTE_USER") + if not remote_user: + error(f"--remote-user not provided and MTE_REMOTE_USER not set.") + sys.exit(1) + + remote_host = args.remote_user or os.getenv("MTE_REMOTE_HOST") + if not remote_host: + error(f"--remote-host not provided and MTE_REMOTE_HOST not set.") + sys.exit(1) + + remote_jump_user = args.remote_jump_user or os.getenv("MTE_REMOTE_JUMP_USER") + remote_jump_host = args.remote_jump_host or os.getenv("MTE_REMOTE_JUMP_HOST") + remote_jump_port = args.remote_jump_port or os.getenv("MTE_REMOTE_JUMP_PORT") + if ( + any([remote_jump_user, remote_jump_host, remote_jump_port]) + and all([remote_jump_user, remote_jump_host]) == False + ): + error( + ( + "must provide both " + "--remote-jump-user (or set MTE_REMOTE_JUMP_USER) and " + "--remote-jump-host (or set MTE_REMOTE_JUMP_HOST) " + "or set neither" + ) + ) + sys.exit(1) + + experiment = args.experiment + experiment_root = args.base + result_root = args.result + cleanup = args.cleanup + + ctx = RemoteContext( + remote_user=remote_user, + remote_host=remote_host, + jump_user=remote_jump_user, + jump_host=remote_jump_host, + jump_port=remote_jump_port, + delete=cleanup, + ) + asyncio.run( + run( + ctx=ctx, + local_result_path=result_root, + experiment_base=experiment_root, + experiment=experiment, + ) + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="benchmark-tool", + description="A tool for performing benchmarks on remote machines.", + ) + + experiment_root = Path(__file__).parent.parent / Path("experiments/") + _ = parser.add_argument( + "--base", + default=str(experiment_root), + type=str, + help="The base directory in which the experiments are implemented.", + ) + + result_root = Path(__file__).parent.parent / Path("results/") + _ = parser.add_argument( + "--result", + default=str(result_root), + type=str, + help="The base directory in which all the results are placed.", + ) + + _ = parser.add_argument( + "--experiment", + type=str, + choices=experiments.experiment_choices(), + required=True, + help="The experiment to run", + ) + + _ = parser.add_argument( + "--cleanup", + action="store_true", + help="Cleanup all the resources of the context", + ) + + subparsers = parser.add_subparsers(dest="command", required=True) + + local = subparsers.add_parser("local", help="Run the experiment locally.") + local.set_defaults(func=run_local) + + remote = subparsers.add_parser("remote", help="Run the experiment remotely") + remote.set_defaults(func=run_remote) + + remote.add_argument( + "--remote-user", help="Remote username (or set MTE_REMOTE_USER)" + ) + remote.add_argument("--remote-host", help="Remote host (or set MTE_REMOTE_HOST)") + + remote.add_argument( + "--remote-jump-user", help="Remote jump user (or set MTE_REMOTE_JUMP_USER)" + ) + remote.add_argument( + "--remote-jump-host", help="Remote jump host (or set MTE_REMOTE_JUMP_HOST)" + ) + remote.add_argument( + "--remote-jump-port", help="Remote jump port (or set MTE_REMOTE_JUMP_PORT)" + ) + + args = parser.parse_args() + args.func(args) diff --git a/archive/2024/winter/bsc_dichler/scripts/plot/cas.py b/archive/2024/winter/bsc_dichler/scripts/plot/cas.py new file mode 100644 index 000000000..e6f3a08e1 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/scripts/plot/cas.py @@ -0,0 +1,92 @@ +from pathlib import Path +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def plot(output_root: Path, format): + output_root = output_root / Path("cas") + csv_file1 = output_root / Path("run.csv") + csv_file2 = output_root / Path("run_mte.csv") + + df1 = pd.read_csv(csv_file1, sep=";") + df2 = pd.read_csv(csv_file2, sep=";") + + df1["duration"] = df1["duration"] / 1_000_000_000 + df2["duration"] = df2["duration"] / 1_000_000_000 + + grouped1 = df1.groupby("cores")["duration"].agg(["mean", "std"]).reset_index() + grouped2 = df2.groupby("cores")["duration"].agg(["mean", "std"]).reset_index() + + merged = pd.merge(grouped1, grouped2, on="cores", suffixes=("_1", "_2")) + + _, ax1 = plt.subplots(figsize=(10, 6)) + + library = ["1 Thread", "2 Threads", "3 Threads", "4 Threads"] + x = np.arange(len(library)) + bar_width = 0.35 + + ax1.bar( + x - bar_width / 2, + merged["mean_1"], + yerr=merged["std_1"], + width=bar_width, + capsize=5, + label="MTE disabled", + color="#fc9272", + edgecolor="black", + linewidth=2, + ) + ax1.bar( + x + bar_width / 2, + merged["mean_2"], + yerr=merged["std_2"], + width=bar_width, + capsize=5, + label="MTE enabled", + color="#a6bddb", + edgecolor="black", + linewidth=2, + ) + + for i, row in merged.iterrows(): + x_start = i - 0.05 + + y_start = row["mean_2"] + y_end = row["mean_1"] + + if (y_start - row["std_2"]) <= (y_end + row["std_1"]): + continue + + ax1.annotate( + f"", + xy=(x_start - 0.25 / 2, y_start), + xytext=(x_start - 0.25 / 2, y_end + 0.05 * y_end), + arrowprops=dict(arrowstyle="->", color="red", lw=2), + color="red", + ha="center", + ) + + percentage = y_start / y_end + ax1.text( + x_start - 0.25 / 2, + y_start + ((y_end - y_start) / 2), + f"{percentage:.2f}×", + color="red", + fontweight="bold", + bbox=dict(facecolor="white", alpha=1.0, edgecolor="none"), + ha="center", + ) + + plt.xticks(x, library) + ax1.set_ylabel("Time (s)") + ax1.set_xlabel(r"#Threads") + + ax1.spines["top"].set_visible(False) + ax1.spines["right"].set_visible(False) + ax1.set_title("Lower is better ↓", color="navy") + ax1.legend(loc="upper left") + ax1.set_ylim(ymin=0) + + output = output_root / Path(f"result.{format}") + plt.savefig(output, format=format) diff --git a/archive/2024/winter/bsc_dichler/scripts/plot/contiguous.py b/archive/2024/winter/bsc_dichler/scripts/plot/contiguous.py new file mode 100644 index 000000000..24981138e --- /dev/null +++ b/archive/2024/winter/bsc_dichler/scripts/plot/contiguous.py @@ -0,0 +1,72 @@ +from pathlib import Path +import pandas as pd +import matplotlib.pyplot as plt + + +def plot(output_root: Path, format): + output_root = output_root / Path("contiguous") + csv_file1 = output_root / Path("load16.csv") + csv_file2 = output_root / Path("load16_mte.csv") + + df1 = pd.read_csv(csv_file1, sep=";") + df2 = pd.read_csv(csv_file2, sep=";") + + df1["kb"] = df1["len"] * 64 // 1024 + df2["kb"] = df2["len"] * 64 // 1024 + + grouped1 = df1.groupby("kb")["duration"].agg(["mean", "std"]).reset_index() + grouped2 = df2.groupby("kb")["duration"].agg(["mean", "std"]).reset_index() + + merged = pd.merge(grouped1, grouped2, on="kb", suffixes=("_1", "_2")) + + _, ax1 = plt.subplots(figsize=(10, 6)) + + ax1.errorbar( + merged["kb"], + merged["mean_1"], + yerr=merged["std_1"], + fmt="o-", + color="#a6bddb", + capsize=5, + label="MTE disabled", + ) + ax1.errorbar( + merged["kb"], + merged["mean_2"], + yerr=merged["std_2"], + fmt="s-", + color="#fc9272", + capsize=5, + label="MTE enabled", + ) + + merged["lower_1"] = merged["mean_1"] - merged["std_1"] + merged["lower_2"] = merged["mean_2"] - merged["std_2"] + merged["upper_1"] = merged["mean_1"] + merged["std_1"] + merged["upper_2"] = merged["mean_2"] + merged["std_2"] + + ax1.set_xscale("log", base=2) + ax1.set_yscale("log", base=2) + + ax1.legend(loc="upper left") + ax1.grid(True, which="both", linestyle="--", linewidth=0.5) + + xticks = grouped1["kb"].to_numpy() + xticks_filtered = [x for i, x in enumerate(xticks) if i % 2 == 0] + xtick_labels = [ + f"{int(x/1024)} MiB" if x >= 1024 else f"{int(x)} KiB" for x in xticks_filtered + ] + + plt.xticks(xticks_filtered, xtick_labels, rotation=45, ha="right") + + plt.title("Lower is better ↓", color="navy") + plt.ylabel("Time (ns - logarithmic scale)") + plt.xlabel("Memory size (logarithmic scale)") + + ax1.spines["top"].set_visible(False) + ax1.spines["right"].set_visible(False) + + plt.tight_layout() + + output = output_root / Path(f"result.{format}") + plt.savefig(output, format=format) diff --git a/archive/2024/winter/bsc_dichler/scripts/plot/contiguous_tagging.py b/archive/2024/winter/bsc_dichler/scripts/plot/contiguous_tagging.py new file mode 100644 index 000000000..df6a3eba1 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/scripts/plot/contiguous_tagging.py @@ -0,0 +1,91 @@ +from pathlib import Path +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def plot(output_root: Path, format): + output_root = output_root / Path("contiguous_tagging") + labels = ["malloc", "stg", "st2g", "ldg", "ldr", "str"] + csv_files = [ + output_root / Path("tag_malloc.csv"), + output_root / Path("tag_stg.csv"), + output_root / Path("tag_st2g.csv"), + output_root / Path("ldg.csv"), + output_root / Path("load.csv"), + output_root / Path("store.csv"), + ] + + dfs = [] + for file in csv_files: + df = pd.read_csv(file, sep=";") + df["duration"] = df["duration"] + + df["duration_per_op"] = df["duration"] / df["ops"] + g = df.groupby("size")["duration_per_op"].agg(["mean", "std"]).reset_index() + + dfs.append(g) + + _, ax1 = plt.subplots(figsize=(10, 6)) + + library = [""] + colors = [ + "#a6bddb", + "#fc9272", + "#99d8c9", + "#c994c7", + "#fdbb84", + "#efedf5", + "skyblue", + ] + bar_width = 0.8 / len(csv_files) + x = np.arange(len(library)) + + group_mins = {} + group_maxs = {} + + hatches = ["", "", "", "", "", "", ""] + for i in range(len(csv_files)): + df = dfs[i] + mean_values = df["mean"].values + + for j, size in enumerate(df["size"].values): + x_pos = x[j] + (i - (len(csv_files) - 1) / 2) * bar_width + ax1.text( + x_pos, + mean_values[j], + f"{mean_values[j]:.3f}", + ha="center", + va="bottom", + color="black", + ) + if size not in group_mins or mean_values[j] < group_mins[size][1]: + group_mins[size] = (x_pos, mean_values[j]) # Store (x, min mean) + if size not in group_maxs or mean_values[j] > group_maxs[size][1]: + group_maxs[size] = (x_pos, mean_values[j]) # Store (x, max mean) + + _ = ax1.bar( + x + (i - (len(csv_files) - 1) / 2) * bar_width, + mean_values, + yerr=df["std"], + width=bar_width, + capsize=5, + label=labels[i], + color=colors[i % len(colors)], + edgecolor="black", + hatch=hatches[i], + linewidth=2, + ) + + ax1.set_ylabel("Time (ns)") + ax1.set_xlabel("Single instruction") + ax1.legend(labels=labels, loc="upper right") + plt.title("Lower is better ↓", color="navy") + ax1.spines["top"].set_visible(False) + ax1.spines["right"].set_visible(False) + plt.xticks(x, library, ha="right") + ax1.set_ylim(ymin=0) + plt.tight_layout() + + output = output_root / Path(f"result.{format}") + plt.savefig(output, format=format) diff --git a/archive/2024/winter/bsc_dichler/scripts/plot/malloc.py b/archive/2024/winter/bsc_dichler/scripts/plot/malloc.py new file mode 100644 index 000000000..50a21aa18 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/scripts/plot/malloc.py @@ -0,0 +1,122 @@ +from pathlib import Path +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +def run(csv_file1, csv_file2, duration_type, output, format): + df1 = pd.read_csv(csv_file1, sep=";") + df2 = pd.read_csv(csv_file2, sep=";") + + def norm(df): + df["duration_allocation"] = df["duration_allocation"] / df["allocation"] / 1000 + df["duration_deallocation"] = ( + df["duration_deallocation"] / df["allocation"] / 1000 + ) + + norm(df1) + norm(df2) + + grouped1 = ( + df1.groupby("size")[["duration_allocation", "duration_deallocation"]] + .agg(["mean", "std"]) + .reset_index() + ) + grouped2 = ( + df2.groupby("size")[["duration_allocation", "duration_deallocation"]] + .agg(["mean", "std"]) + .reset_index() + ) + + _, ax1 = plt.subplots(figsize=(12, 6)) + + library = [ + "16 Bytes", + "128 Bytes", + "256 Bytes", + "1024 Bytes", + "2048 Bytes", + "4096 Bytes", + "8192 Bytes", + ] + x = np.arange(len(library)) + bar_width = 0.35 + + bars1 = ax1.bar( + x - bar_width / 2, + grouped1[duration_type]["mean"], + yerr=grouped1[duration_type]["std"], + width=bar_width, + capsize=5, + label="MTE disabled", + color="#a6bddb", + edgecolor="black", + linewidth=2, + ) + bars2 = ax1.bar( + x + bar_width / 2, + grouped2[duration_type]["mean"], + yerr=grouped2[duration_type]["std"], + width=bar_width, + capsize=5, + label="MTE enabled", + color="#fc9272", + edgecolor="black", + linewidth=2, + ) + + for i in range(len(bars1)): + height2 = bars2[i].get_height() + height1 = bars1[i].get_height() + + ax1.annotate( + "", + xy=(x[i] - bar_width / 2, height2), + xytext=(x[i] - bar_width / 2, height1), + arrowprops=dict(arrowstyle="->", color="red", lw=2), + color="red", + ha="center", + ) + + difference = height2 / height1 + ax1.text( + i - bar_width / 2 - 0.04, + height1 + ((height2 - height1) / 2) - 0.08, + f"{difference:.2f}×", + color="red", + fontweight="bold", + bbox=dict(facecolor="white", alpha=1.0, edgecolor="none"), + ha="center", + ) + + plt.ylabel("Time (µs)") + ax1.set_xlabel("Batch size") + ax1.legend(loc="upper left") + plt.title("Lower is better ↓", color="navy") + + ax1.spines["top"].set_visible(False) + ax1.spines["right"].set_visible(False) + plt.tight_layout() + + plt.xticks(x, library) + plt.tight_layout() + plt.savefig(output, format=format) + + +def plot(output_root: Path, format): + output_root = output_root / Path("malloc") + + run( + output_root / Path("malloc.csv"), + output_root / Path("malloc_mte.csv"), + "duration_allocation", + output_root / Path(f"result-alloc.{format}"), + format, + ) + run( + output_root / Path("malloc.csv"), + output_root / Path("malloc_mte.csv"), + "duration_deallocation", + output_root / Path(f"result-dealloc.{format}"), + format, + ) diff --git a/archive/2024/winter/bsc_dichler/scripts/plot/non_contiguous.py b/archive/2024/winter/bsc_dichler/scripts/plot/non_contiguous.py new file mode 100644 index 000000000..ddfae4322 --- /dev/null +++ b/archive/2024/winter/bsc_dichler/scripts/plot/non_contiguous.py @@ -0,0 +1,99 @@ +from pathlib import Path +import pandas as pd +import matplotlib.pyplot as plt + + +def plot(output_root: Path, format): + output_root = output_root / Path("non_contiguous") + mte_enabled_csv = output_root / Path("load_mte.csv") + mte_disabled_csv = output_root / Path("load.csv") + + enabled_df = pd.read_csv(mte_enabled_csv, sep=";") + disabled_df = pd.read_csv(mte_disabled_csv, sep=";") + + for df in [enabled_df, disabled_df]: + df["duration"] = df["duration"] / 1_000_000 + df["kb"] = df["len"] * 16 // 1024 + + enabled_grouped = ( + enabled_df.groupby("kb")["duration"].agg(["mean", "std"]).reset_index() + ) + disabled_grouped = ( + disabled_df.groupby("kb")["duration"].agg(["mean", "std"]).reset_index() + ) + + _, ax1 = plt.subplots(figsize=(10, 6)) + ax1.errorbar( + enabled_grouped["kb"], + enabled_grouped["mean"], + yerr=enabled_grouped["std"], + capsize=5, + color="#a6bddb", + linewidth=2.5, + label="MTE enabled", + ) + ax1.errorbar( + disabled_grouped["kb"], + disabled_grouped["mean"], + yerr=disabled_grouped["std"], + capsize=5, + color="#fc9272", + linewidth=2.5, + label="MTE disabled", + ) + + ax1.set_xscale("log", base=2) + ax1.set_yscale("log", base=2) + + tick_positions = enabled_grouped["kb"].iloc[::2] + tick_labels = [ + f"{v} KiB" if v < 1024 else f"{v // 1024} MiB" for v in tick_positions + ] + ax1.set_xticks(tick_positions) + ax1.set_xticklabels(tick_labels, rotation=45, ha="right") + + plt.xticks() + plt.yticks() + + enabled_grouped["lower"] = enabled_grouped["mean"] - enabled_grouped["std"] + disabled_grouped["lower"] = disabled_grouped["mean"] - disabled_grouped["std"] + enabled_grouped["upper"] = enabled_grouped["mean"] + enabled_grouped["std"] + disabled_grouped["upper"] = disabled_grouped["mean"] + disabled_grouped["std"] + + overlap = (disabled_grouped["lower"] <= enabled_grouped["upper"]) & ( + disabled_grouped["upper"] >= enabled_grouped["lower"] + ) + + enabled_grouped["percentage_diff"] = ( + (enabled_grouped["mean"] - disabled_grouped["mean"]) / enabled_grouped["mean"] + ) * 100 + enabled_grouped.loc[overlap, "percentage_diff"] = 0 + + ax2 = ax1.twinx() + ax2.plot( + enabled_grouped["kb"], + enabled_grouped["percentage_diff"], + "r--", + label="% Difference", + ) + ax2.set_ylabel("Difference (%)", fontsize=12) + ax2.set_ylim( + min(enabled_grouped["percentage_diff"]), + max(enabled_grouped["percentage_diff"]) + 10, + ) + + h1, l1 = ax1.get_legend_handles_labels() + h2, l2 = ax2.get_legend_handles_labels() + + ax1.legend(loc="upper left", handles=h1 + h2, labels=l1 + l2) + + ax1.set_ylabel("Time (ns - logarithmic scale)") + ax1.set_xlabel("Memory size (logarithmic scale)") + + plt.title("Lower is better ↓", color="navy") + + ax1.grid(True, which="both", linestyle=":", linewidth=0.5) + plt.tight_layout() + + output = output_root / Path(f"result.{format}") + plt.savefig(output, format=format) diff --git a/archive/2024/winter/bsc_dichler/scripts/plot/parallel_non_contiguous.py b/archive/2024/winter/bsc_dichler/scripts/plot/parallel_non_contiguous.py new file mode 100644 index 000000000..f766fcedc --- /dev/null +++ b/archive/2024/winter/bsc_dichler/scripts/plot/parallel_non_contiguous.py @@ -0,0 +1,117 @@ +from pathlib import Path +import pandas as pd +import matplotlib.pyplot as plt + + +def run(csv_files: list[Path], output: Path, format): + _, ax1 = plt.subplots(figsize=(10, 6)) + labels = ["MTE disabled", "MTE enabled"] + + data = [] + unique_threads = set() + tick_positions = None + for file in csv_files: + df = pd.read_csv(file, sep=";") + df["duration"] = df["duration"] // 1_000 + g = ( + df.groupby(["len", "threads"])["duration"] + .agg(["mean", "std"]) + .reset_index() + ) + g["kb"] = (g["len"] * 16) / 1024 + data.append(g) + unique_threads.update(g["threads"].unique()) + tick_positions = g["kb"].unique() + + unique_threads = sorted(unique_threads) + + thread_colors = {1: "tomato", 2: "#56B4E9", 3: "#009E73", 4: "#E69F00"} + + line_styles = ["-", "--"] + + handles = [] + labels_legend = [] + + for thread in unique_threads: + for idx, g in enumerate(data): + subset = g[g["threads"] == thread] + if subset.empty: + continue + + line = ax1.errorbar( + subset["kb"], + subset["mean"], + yerr=subset["std"], + fmt="o", + linestyle=line_styles[idx], + capsize=5, + color=thread_colors.get(thread, "black"), + ) + + label = ( + f"1 Thread - {labels[idx]}" + if thread == 1 + else f"{thread} Threads - {labels[idx]}" + ) + handles.append(line[0]) + labels_legend.append(label) + + ax1.legend( + handles=handles, + labels=labels_legend, + ncol=2, + loc="upper left", + fancybox=True, + shadow=False, + ) + + ax1.set_xscale("log", base=2) + ax1.set_yscale("log", base=2) + + ax1.grid(True, which="both", linestyle="--", linewidth=0.5) + tick_labels = [] + xticks_filtered = [x for i, x in enumerate(tick_positions) if i % 2 == 0] + for idx, v in enumerate(xticks_filtered): + l = "" + if v < 1024: + if v < 1: + l = "1 KiB" + else: + l = f"{v} KiB" + else: + l = f"{v // 1024} MiB" + + tick_labels.append(l) + + ax1.set_xticks(xticks_filtered) + ax1.set_xticklabels(tick_labels, rotation=45, ha="right") + ax1.set_ylabel("Time (ns - logarithmic scale)") + ax1.set_xlabel("Memory size (logarithmic scale)") + + ax1.spines["top"].set_visible(False) + ax1.spines["right"].set_visible(False) + + plt.title("Lower is better ↓", color="navy") + plt.tight_layout() + plt.savefig(output, format=format) + + +def plot(output_root: Path, format): + output_root = output_root / Path("parallel_non_contiguous") + + run( + [ + output_root / Path("write.csv"), + output_root / Path("write_mte.csv"), + ], + output_root / Path(f"result-write.{format}"), + format, + ) + run( + [ + output_root / Path("load.csv"), + output_root / Path("load_mte.csv"), + ], + output_root / Path(f"result-read.{format}"), + format, + ) diff --git a/archive/2024/winter/bsc_dichler/scripts/requirements.txt b/archive/2024/winter/bsc_dichler/scripts/requirements.txt new file mode 100644 index 000000000..cbcb6948c --- /dev/null +++ b/archive/2024/winter/bsc_dichler/scripts/requirements.txt @@ -0,0 +1,4 @@ +black==24.1.0 +pandas==2.2.3 +matplotlib==3.10.0 +seaborn==0.13.2 |