diff options
| author | Christian Krinitsin <mail@krinitsin.com> | 2025-07-08 16:45:54 +0200 |
|---|---|---|
| committer | Christian Krinitsin <mail@krinitsin.com> | 2025-07-08 16:45:54 +0200 |
| commit | 35f097a31e1c58892a69178b84ddba658efe9c8f (patch) | |
| tree | 2da7d86cd4e3b7dd811746b1206bd5bbb90d59a7 | |
| parent | 5aa276efcbd67f4300ca1a7f809c6e00aadb03da (diff) | |
| download | emulator-bug-study-35f097a31e1c58892a69178b84ddba658efe9c8f.tar.gz emulator-bug-study-35f097a31e1c58892a69178b84ddba658efe9c8f.zip | |
manually review misclassifications
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/analysis.csv | 3 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/categories.csv | 7 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1022 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1022) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1824344 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1824344) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1833 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1833) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1898954 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1898954) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1908626 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1908626) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1915327 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1915327) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1967248 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1967248) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2374 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2374) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2495 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2495) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1550503 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1550503) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1593 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1593) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1854738 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1854738) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1869782 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1869782) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1895 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1895) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1910 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1910) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/2448 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2448) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1617929 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1617929) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1738545 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1738545) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1805913 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1805913) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1830 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1830) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1858461 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1858461) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1906193 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1906193) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2553 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2553) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2560 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2560) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/306 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/306) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/739785 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/739785) | 0 | ||||
| -rw-r--r-- | results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/829 (renamed from results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/829) | 0 |
29 files changed, 6 insertions, 4 deletions
diff --git a/results/classifier/no-thinking-deepseek-r1:70b/analysis.csv b/results/classifier/no-thinking-deepseek-r1:70b/analysis.csv new file mode 100644 index 00000000..a62bdf52 --- /dev/null +++ b/results/classifier/no-thinking-deepseek-r1:70b/analysis.csv @@ -0,0 +1,3 @@ +category, count +instruction, 116 +runtime, 3 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/categories.csv b/results/classifier/no-thinking-deepseek-r1:70b/categories.csv index 489a55f4..1de6fd2e 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/categories.csv +++ b/results/classifier/no-thinking-deepseek-r1:70b/categories.csv @@ -1,5 +1,4 @@ category, count -syscall, 131 -runtime, 202 -manual-review, 27 -instruction, 191 +instruction, 200 +runtime, 209 +syscall, 142 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1022 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1022 index a536c9c2..a536c9c2 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1022 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1022 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1824344 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1824344 index 984f0de0..984f0de0 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1824344 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1824344 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1833 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1833 index e8dd75d3..e8dd75d3 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1833 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1833 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1898954 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1898954 index b388f888..b388f888 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1898954 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1898954 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1908626 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1908626 index ab83b68c..ab83b68c 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1908626 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1908626 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1915327 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1915327 index 6ca89558..6ca89558 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1915327 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1915327 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1967248 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1967248 index b66e2947..b66e2947 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1967248 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/1967248 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2374 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2374 index db22962c..db22962c 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2374 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2374 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2495 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2495 index 7a4345ab..7a4345ab 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2495 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/instruction/2495 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1550503 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1550503 index 2e52922e..2e52922e 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1550503 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1550503 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1593 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1593 index c3f980de..c3f980de 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1593 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1593 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1854738 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1854738 index b0a9b82e..b0a9b82e 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1854738 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1854738 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1869782 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1869782 index f0050a20..f0050a20 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1869782 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1869782 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1895 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1895 index 2062b6a1..2062b6a1 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1895 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1895 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1910 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1910 index 4665dcb2..4665dcb2 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1910 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/1910 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2448 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/2448 index c1d7d8f6..c1d7d8f6 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2448 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/runtime/2448 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1617929 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1617929 index 934062a2..934062a2 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1617929 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1617929 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1738545 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1738545 index d16af140..d16af140 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1738545 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1738545 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1805913 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1805913 index 2227f171..2227f171 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1805913 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1805913 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1830 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1830 index e40ef884..e40ef884 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1830 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1830 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1858461 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1858461 index e28c5d30..e28c5d30 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1858461 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1858461 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1906193 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1906193 index 45c535e9..45c535e9 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/1906193 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/1906193 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2553 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2553 index 037b118c..037b118c 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2553 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2553 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2560 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2560 index cc6828b2..cc6828b2 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/2560 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/2560 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/306 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/306 index b8c46fe2..b8c46fe2 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/306 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/306 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/739785 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/739785 index fc7c50b9..fc7c50b9 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/739785 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/739785 diff --git a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/829 b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/829 index 80e762b1..80e762b1 100644 --- a/results/classifier/no-thinking-deepseek-r1:70b/reasoning/manual-review/829 +++ b/results/classifier/no-thinking-deepseek-r1:70b/reasoning/syscall/829 |