summary refs log tree commit diff stats
path: root/results/classifier/no-thinking-deepseek-r1:70b
diff options
context:
space:
mode:
authorChristian Krinitsin <mail@krinitsin.com>2025-07-08 16:45:54 +0200
committerChristian Krinitsin <mail@krinitsin.com>2025-07-08 16:45:54 +0200
commit35f097a31e1c58892a69178b84ddba658efe9c8f (patch)
tree2da7d86cd4e3b7dd811746b1206bd5bbb90d59a7 /results/classifier/no-thinking-deepseek-r1:70b
parent5aa276efcbd67f4300ca1a7f809c6e00aadb03da (diff)
downloademulator-bug-study-35f097a31e1c58892a69178b84ddba658efe9c8f.tar.gz
emulator-bug-study-35f097a31e1c58892a69178b84ddba658efe9c8f.zip
manually review misclassifications
Diffstat (limited to 'results/classifier/no-thinking-deepseek-r1:70b')
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/analysis.csv3
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/categories.csv7
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1022 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1022)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1824344 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1824344)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1833 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1833)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1898954 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1898954)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1908626 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1908626)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1915327 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1915327)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1967248 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1967248)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2374 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2374)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2495 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2495)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1550503 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1550503)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1593 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1593)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1854738 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1854738)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1869782 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1869782)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1895 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1895)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1910 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1910)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/2448 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2448)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1617929 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1617929)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1738545 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1738545)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1805913 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1805913)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1830 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1830)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1858461 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1858461)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1906193 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1906193)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2553 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2553)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2560 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2560)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/306 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/306)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/739785 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/739785)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/829 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/829)0
29 files changed, 6 insertions, 4 deletions
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/analysis.csv b/results/classifier/no-thinking-deepseek-r1:70b/analysis.csv
new file mode 100644
index 00000000..a62bdf52
--- /dev/null
+++ b/results/classifier/no-thinking-deepseek-r1:70b/analysis.csv
@@ -0,0 +1,3 @@
+category, count
+instruction, 116
+runtime, 3
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/categories.csv b/results/classifier/no-thinking-deepseek-r1:70b/categories.csv
index 489a55f4..1de6fd2e 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/categories.csv
+++ b/results/classifier/no-thinking-deepseek-r1:70b/categories.csv
@@ -1,5 +1,4 @@
 category, count
-syscall, 131
-runtime, 202
-manual-review, 27
-instruction, 191
+instruction, 200
+runtime, 209
+syscall, 142
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1022 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1022
index a536c9c2..a536c9c2 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1022
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1022
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1824344 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1824344
index 984f0de0..984f0de0 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1824344
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1824344
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1833 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1833
index e8dd75d3..e8dd75d3 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1833
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1833
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1898954 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1898954
index b388f888..b388f888 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1898954
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1898954
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1908626 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1908626
index ab83b68c..ab83b68c 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1908626
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1908626
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1915327 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1915327
index 6ca89558..6ca89558 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1915327
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1915327
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1967248 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1967248
index b66e2947..b66e2947 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1967248
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1967248
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2374 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2374
index db22962c..db22962c 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2374
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2374
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2495 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2495
index 7a4345ab..7a4345ab 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2495
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2495
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1550503 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1550503
index 2e52922e..2e52922e 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1550503
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1550503
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1593 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1593
index c3f980de..c3f980de 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1593
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1593
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1854738 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1854738
index b0a9b82e..b0a9b82e 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1854738
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1854738
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1869782 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1869782
index f0050a20..f0050a20 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1869782
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1869782
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1895 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1895
index 2062b6a1..2062b6a1 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1895
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1895
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1910 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1910
index 4665dcb2..4665dcb2 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1910
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1910
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2448 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/2448
index c1d7d8f6..c1d7d8f6 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2448
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/2448
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1617929 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1617929
index 934062a2..934062a2 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1617929
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1617929
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1738545 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1738545
index d16af140..d16af140 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1738545
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1738545
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1805913 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1805913
index 2227f171..2227f171 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1805913
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1805913
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1830 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1830
index e40ef884..e40ef884 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1830
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1830
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1858461 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1858461
index e28c5d30..e28c5d30 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1858461
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1858461
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1906193 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1906193
index 45c535e9..45c535e9 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1906193
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1906193
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2553 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2553
index 037b118c..037b118c 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2553
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2553
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2560 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2560
index cc6828b2..cc6828b2 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2560
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2560
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/306 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/306
index b8c46fe2..b8c46fe2 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/306
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/306
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/739785 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/739785
index fc7c50b9..fc7c50b9 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/739785
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/739785
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/829 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/829
index 80e762b1..80e762b1 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/829
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/829