summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorChristian Krinitsin <mail@krinitsin.com>2025-06-01 21:19:28 +0200
committerChristian Krinitsin <mail@krinitsin.com>2025-06-01 21:19:28 +0200
commit05ed9b00104b3da2e9cb0d541b5c5bbceb027fde (patch)
treed47bd9721b78db6131d56780e7230cf69412eaa6
parentcddf7dfa5a6e92a9057e7f6afae5d9b970585e6f (diff)
downloadqemu-analysis-05ed9b00104b3da2e9cb0d541b5c5bbceb027fde.tar.gz
qemu-analysis-05ed9b00104b3da2e9cb0d541b5c5bbceb027fde.zip
adjust classifier
-rwxr-xr-xclassification/main.py15
-rw-r--r--classification/output.py4
-rw-r--r--classification/semantic_issues/gitlab_semantic_addsubps23
-rw-r--r--classification/semantic_issues/gitlab_semantic_adox36
-rw-r--r--classification/semantic_issues/gitlab_semantic_bextr25
-rw-r--r--classification/semantic_issues/gitlab_semantic_blsi20
-rw-r--r--classification/semantic_issues/gitlab_semantic_blsmsk27
-rw-r--r--classification/semantic_issues/gitlab_semantic_bzhi38
8 files changed, 184 insertions, 4 deletions
diff --git a/classification/main.py b/classification/main.py
index 3394a3fba..e38460acb 100755
--- a/classification/main.py
+++ b/classification/main.py
@@ -11,7 +11,9 @@ parser.add_argument('-d', '--deepseek', action='store_true')
 parser.add_argument('-t', '--test', action='store_true')
 args = parser.parse_args()
 
-categories = ['semantic', 'other', 'mistranslation', 'instruction']
+positive_categories = ['semantic', 'mistranslation', 'instruction', 'assembly'] # to add: register
+negative_categories = ['other', 'boot', 'network', 'KVM', 'vnc', 'graphic', 'device', 'socket'] # to add: performance
+categories = positive_categories + negative_categories
 
 def main():
     if args.deepseek:
@@ -24,12 +26,21 @@ def main():
         exit()
 
     bugs = list_files_recursive("../mailinglist/output_mailinglist")
+    bugs = bugs + list_files_recursive("./semantic_issues")
     for bug in bugs:
+        print(f"Processing {bug}")
         with open(bug, "r") as file:
             text = file.read()
 
         result = classifier(text, categories, multi_label=True)
-        output(text, result['labels'], result['scores'], path.basename(bug))
+        category = result['labels'][0]
+
+        for label, score in zip(result["labels"], result["scores"]):
+            if label in negative_categories and score >= 0.92:
+                category = label
+                break
+
+        output(text, category, result['labels'], result['scores'], path.basename(bug))
 
 if __name__ == "__main__":
     main()
diff --git a/classification/output.py b/classification/output.py
index df64dbccd..971c3830e 100644
--- a/classification/output.py
+++ b/classification/output.py
@@ -1,7 +1,7 @@
 from os import path, makedirs
 
-def output(text : str, labels : list, scores : list, identifier : str):
-    file_path = f"output/{labels[0]}/{identifier}"
+def output(text : str, category : str, labels : list, scores : list, identifier : str):
+    file_path = f"output/{category}/{identifier}"
     makedirs(path.dirname(file_path), exist_ok = True)
 
     with open(file_path, "w") as file:
diff --git a/classification/semantic_issues/gitlab_semantic_addsubps b/classification/semantic_issues/gitlab_semantic_addsubps
new file mode 100644
index 000000000..60438ff08
--- /dev/null
+++ b/classification/semantic_issues/gitlab_semantic_addsubps
@@ -0,0 +1,23 @@
+x86 SSE/SSE2/SSE3 instruction semantic bugs with NaN
+
+Description of problem
+The result of SSE/SSE2/SSE3 instructions with NaN is different from the CPU. From Intel manual Volume 1 Appendix D.4.2.2, they defined the behavior of such instructions with NaN. But I think QEMU did not implement this semantic exactly because the byte result is different.
+
+Steps to reproduce
+
+Compile this code
+
+void main() {
+    asm("mov rax, 0x000000007fffffff; push rax; mov rax, 0x00000000ffffffff; push rax; movdqu XMM1, [rsp];");
+    asm("mov rax, 0x2e711de7aa46af1a; push rax; mov rax, 0x7fffffff7fffffff; push rax; movdqu XMM2, [rsp];");
+    asm("addsubps xmm1, xmm2");
+}
+
+Execute and compare the result with the CPU. This problem happens with other SSE/SSE2/SSE3 instructions specified in the manual, Volume 1 Appendix D.4.2.2.
+
+CPU xmm1[3] = 0xffffffff
+
+QEMU xmm1[3] = 0x7fffffff
+
+Additional information
+This bug is discovered by research conducted by KAIST SoftSec.
diff --git a/classification/semantic_issues/gitlab_semantic_adox b/classification/semantic_issues/gitlab_semantic_adox
new file mode 100644
index 000000000..9f4471c9a
--- /dev/null
+++ b/classification/semantic_issues/gitlab_semantic_adox
@@ -0,0 +1,36 @@
+x86 ADOX and ADCX semantic bug
+Description of problem
+The result of instruction ADOX and ADCX are different from the CPU. The value of one of EFLAGS is different.
+
+Steps to reproduce
+
+Compile this code
+
+
+void main() {
+    asm("push 512; popfq;");
+    asm("mov rax, 0xffffffff84fdbf24");
+    asm("mov rbx, 0xb197d26043bec15d");
+    asm("adox eax, ebx");
+}
+
+
+
+Execute and compare the result with the CPU. This problem happens with ADCX, too (with CF).
+
+CPU
+
+OF = 0
+
+
+QEMU
+
+OF = 1
+
+
+
+
+
+
+Additional information
+This bug is discovered by research conducted by KAIST SoftSec.
diff --git a/classification/semantic_issues/gitlab_semantic_bextr b/classification/semantic_issues/gitlab_semantic_bextr
new file mode 100644
index 000000000..dabe16acf
--- /dev/null
+++ b/classification/semantic_issues/gitlab_semantic_bextr
@@ -0,0 +1,25 @@
+x86 BEXTR semantic bug
+Description of problem
+The result of instruction BEXTR is different with from the CPU. The value of destination register is different. I think QEMU does not consider the operand size limit.
+
+Steps to reproduce
+
+Compile this code
+
+void main() {
+    asm("mov rax, 0x17b3693f77fb6e9");
+    asm("mov rbx, 0x8f635a775ad3b9b4");
+    asm("mov rcx, 0xb717b75da9983018");
+    asm("bextr eax, ebx, ecx");
+}
+
+Execute and compare the result with the CPU.
+
+CPU
+RAX = 0x5a
+
+QEMU
+RAX = 0x635a775a
+
+Additional information
+This bug is discovered by research conducted by KAIST SoftSec.
diff --git a/classification/semantic_issues/gitlab_semantic_blsi b/classification/semantic_issues/gitlab_semantic_blsi
new file mode 100644
index 000000000..92ff92b0e
--- /dev/null
+++ b/classification/semantic_issues/gitlab_semantic_blsi
@@ -0,0 +1,20 @@
+x86 BLSI and BLSR semantic bug
+Description of problem
+The result of instruction BLSI and BLSR is different from the CPU. The value of CF is different.
+
+Steps to reproduce
+
+Compile this code
+
+
+void main() {
+    asm("blsi rax, rbx");
+}
+
+
+
+Execute and compare the result with the CPU. The value of CF is exactly the opposite. This problem happens with BLSR, too.
+
+
+Additional information
+This bug is discovered by research conducted by KAIST SoftSec.
diff --git a/classification/semantic_issues/gitlab_semantic_blsmsk b/classification/semantic_issues/gitlab_semantic_blsmsk
new file mode 100644
index 000000000..b950faa21
--- /dev/null
+++ b/classification/semantic_issues/gitlab_semantic_blsmsk
@@ -0,0 +1,27 @@
+x86 BLSMSK semantic bug
+Description of problem
+The result of instruction BLSMSK is different with from the CPU. The value of CF is different.
+
+Steps to reproduce
+
+Compile this code
+
+void main() {
+    asm("mov rax, 0x65b2e276ad27c67");
+    asm("mov rbx, 0x62f34955226b2b5d");
+    asm("blsmsk eax, ebx");
+}
+
+Execute and compare the result with the CPU.
+
+CPU
+
+CF = 0
+
+
+QEMU
+
+CF = 1
+
+Additional information
+This bug is discovered by research conducted by KAIST SoftSec.
diff --git a/classification/semantic_issues/gitlab_semantic_bzhi b/classification/semantic_issues/gitlab_semantic_bzhi
new file mode 100644
index 000000000..b86da08c7
--- /dev/null
+++ b/classification/semantic_issues/gitlab_semantic_bzhi
@@ -0,0 +1,38 @@
+x86 BZHI semantic bug
+Description of problem
+The result of instruction BZHI is different from the CPU. The value of destination register and SF of EFLAGS are different.
+
+Steps to reproduce
+
+Compile this code
+
+
+void main() {
+    asm("mov rax, 0xb1aa9da2fe33fe3");
+    asm("mov rbx, 0x80000000ffffffff");
+    asm("mov rcx, 0xf3fce8829b99a5c6");
+    asm("bzhi rax, rbx, rcx");
+}
+
+
+
+Execute and compare the result with the CPU.
+
+CPU
+
+RAX = 0x0x80000000ffffffff
+SF = 1
+
+
+QEMU
+
+RAX = 0xffffffff
+SF = 0
+
+
+
+
+
+
+Additional information
+This bug is discovered by research conducted by KAIST SoftSec.