summary refs log tree commit diff stats
path: root/results/classifier
diff options
context:
space:
mode:
authorChristian Krinitsin <mail@krinitsin.com>2025-07-08 16:45:54 +0200
committerChristian Krinitsin <mail@krinitsin.com>2025-07-08 16:45:54 +0200
commit35f097a31e1c58892a69178b84ddba658efe9c8f (patch)
tree2da7d86cd4e3b7dd811746b1206bd5bbb90d59a7 /results/classifier
parent5aa276efcbd67f4300ca1a7f809c6e00aadb03da (diff)
downloadqemu-analysis-35f097a31e1c58892a69178b84ddba658efe9c8f.tar.gz
qemu-analysis-35f097a31e1c58892a69178b84ddba658efe9c8f.zip
manually review misclassifications
Diffstat (limited to 'results/classifier')
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/analysis.csv3
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/categories.csv7
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1022 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1022)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1824344 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1824344)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1833 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1833)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1898954 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1898954)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1908626 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1908626)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1915327 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1915327)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1967248 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1967248)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2374 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2374)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2495 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2495)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1550503 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1550503)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1593 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1593)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1854738 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1854738)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1869782 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1869782)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1895 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1895)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1910 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1910)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/2448 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2448)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1617929 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1617929)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1738545 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1738545)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1805913 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1805913)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1830 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1830)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1858461 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1858461)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1906193 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1906193)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2553 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2553)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2560 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2560)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/306 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/306)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/739785 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/739785)0
-rw-r--r--results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/829 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/829)0
29 files changed, 6 insertions, 4 deletions
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/analysis.csv b/results/classifier/no-thinking-deepseek-r1:70b/analysis.csv
new file mode 100644
index 000000000..a62bdf522
--- /dev/null
+++ b/results/classifier/no-thinking-deepseek-r1:70b/analysis.csv
@@ -0,0 +1,3 @@
+category, count
+instruction, 116
+runtime, 3
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/categories.csv b/results/classifier/no-thinking-deepseek-r1:70b/categories.csv
index 489a55f47..1de6fd2e8 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/categories.csv
+++ b/results/classifier/no-thinking-deepseek-r1:70b/categories.csv
@@ -1,5 +1,4 @@
 category, count
-syscall, 131
-runtime, 202
-manual-review, 27
-instruction, 191
+instruction, 200
+runtime, 209
+syscall, 142
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1022 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1022
index a536c9c2b..a536c9c2b 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1022
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1022
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1824344 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1824344
index 984f0de0b..984f0de0b 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1824344
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1824344
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1833 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1833
index e8dd75d38..e8dd75d38 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1833
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1833
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1898954 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1898954
index b388f888a..b388f888a 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1898954
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1898954
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1908626 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1908626
index ab83b68c4..ab83b68c4 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1908626
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1908626
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1915327 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1915327
index 6ca895587..6ca895587 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1915327
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1915327
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1967248 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1967248
index b66e29475..b66e29475 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1967248
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1967248
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2374 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2374
index db22962cb..db22962cb 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2374
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2374
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2495 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2495
index 7a4345abf..7a4345abf 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2495
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2495
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1550503 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1550503
index 2e52922e5..2e52922e5 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1550503
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1550503
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1593 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1593
index c3f980de4..c3f980de4 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1593
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1593
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1854738 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1854738
index b0a9b82ec..b0a9b82ec 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1854738
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1854738
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1869782 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1869782
index f0050a201..f0050a201 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1869782
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1869782
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1895 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1895
index 2062b6a12..2062b6a12 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1895
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1895
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1910 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1910
index 4665dcb25..4665dcb25 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1910
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1910
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2448 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/2448
index c1d7d8f6f..c1d7d8f6f 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2448
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/2448
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1617929 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1617929
index 934062a21..934062a21 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1617929
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1617929
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1738545 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1738545
index d16af1407..d16af1407 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1738545
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1738545
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1805913 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1805913
index 2227f1714..2227f1714 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1805913
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1805913
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1830 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1830
index e40ef8844..e40ef8844 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1830
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1830
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1858461 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1858461
index e28c5d305..e28c5d305 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1858461
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1858461
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1906193 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1906193
index 45c535e95..45c535e95 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1906193
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1906193
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2553 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2553
index 037b118cd..037b118cd 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2553
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2553
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2560 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2560
index cc6828b27..cc6828b27 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2560
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2560
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/306 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/306
index b8c46fe2e..b8c46fe2e 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/306
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/306
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/739785 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/739785
index fc7c50b98..fc7c50b98 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/739785
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/739785
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/829 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/829
index 80e762b15..80e762b15 100644
--- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/829
+++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/829