summary refs log tree commit diff stats
path: root/classification
diff options
context:
space:
mode:
Diffstat (limited to 'classification')
-rwxr-xr-xclassification/files.py16
-rwxr-xr-xclassification/main.py38
-rw-r--r--classification/output.py12
-rw-r--r--classification/requirements.txt1
-rwxr-xr-xclassification/test.py18
-rw-r--r--classification/test_input/README.md74
-rw-r--r--classification/test_input/gitlab_semantic_addsubps23
-rw-r--r--classification/test_input/gitlab_semantic_adox36
-rw-r--r--classification/test_input/gitlab_semantic_bextr25
-rw-r--r--classification/test_input/gitlab_semantic_blsi20
-rw-r--r--classification/test_input/gitlab_semantic_blsmsk27
-rw-r--r--classification/test_input/gitlab_semantic_bzhi38
-rw-r--r--classification/test_input/mail_other_1120
-rw-r--r--classification/test_input/mail_other_283
-rw-r--r--classification/test_input/mail_other_324
-rw-r--r--classification/test_input/mail_semantic_121
-rw-r--r--classification/test_input/mail_semantic_215
-rw-r--r--classification/test_input/mail_semantic_vmovdqu49
18 files changed, 22 insertions, 618 deletions
diff --git a/classification/files.py b/classification/files.py
deleted file mode 100755
index 65efda6ff..000000000
--- a/classification/files.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import os
-
-def list_files_recursive(path='.'):
-    result = []
-    for entry in os.listdir(path):
-        full_path = os.path.join(path, entry)
-        if os.path.isdir(full_path):
-            result = result + list_files_recursive(full_path)
-        else:
-            result.append(full_path)
-    return result
-
-if __name__ == "__main__":
-    directory_path = '../gitlab/issues_text'
-    arr = list_files_recursive(directory_path)
-    print(arr)
diff --git a/classification/main.py b/classification/main.py
index e38460acb..6786cc2fe 100755
--- a/classification/main.py
+++ b/classification/main.py
@@ -1,29 +1,33 @@
 from transformers import pipeline
-from argparse import ArgumentParser
 from os import path
 
-from test import test
-from files import list_files_recursive
-from output import output
-
-parser = ArgumentParser(prog='classifier')
-parser.add_argument('-d', '--deepseek', action='store_true')
-parser.add_argument('-t', '--test', action='store_true')
-args = parser.parse_args()
-
 positive_categories = ['semantic', 'mistranslation', 'instruction', 'assembly'] # to add: register
 negative_categories = ['other', 'boot', 'network', 'KVM', 'vnc', 'graphic', 'device', 'socket'] # to add: performance
 categories = positive_categories + negative_categories
 
-def main():
-    if args.deepseek:
-        print("deepseek currently not supported")
-        exit()
+def list_files_recursive(path):
+    result = []
+    for entry in os.listdir(path):
+        full_path = os.path.join(path, entry)
+        if os.path.isdir(full_path):
+            result = result + list_files_recursive(full_path)
+        else:
+            result.append(full_path)
+    return result
+
+def output(text : str, category : str, labels : list, scores : list, identifier : str):
+    file_path = f"output/{category}/{identifier}"
+    makedirs(path.dirname(file_path), exist_ok = True)
 
+    with open(file_path, "w") as file:
+        for label, score in zip(labels, scores):
+            file.write(f"{label}: {score:.3f}\n")
+
+        file.write("\n")
+        file.write(text)
+
+def main():
     classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-    if args.test:
-        test(classifier, categories)
-        exit()
 
     bugs = list_files_recursive("../mailinglist/output_mailinglist")
     bugs = bugs + list_files_recursive("./semantic_issues")
diff --git a/classification/output.py b/classification/output.py
deleted file mode 100644
index 971c3830e..000000000
--- a/classification/output.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from os import path, makedirs
-
-def output(text : str, category : str, labels : list, scores : list, identifier : str):
-    file_path = f"output/{category}/{identifier}"
-    makedirs(path.dirname(file_path), exist_ok = True)
-
-    with open(file_path, "w") as file:
-        for label, score in zip(labels, scores):
-            file.write(f"{label}: {score:.3f}\n")
-
-        file.write("\n")
-        file.write(text)
diff --git a/classification/requirements.txt b/classification/requirements.txt
new file mode 100644
index 000000000..38502406f
--- /dev/null
+++ b/classification/requirements.txt
@@ -0,0 +1 @@
+transformers[torch]
diff --git a/classification/test.py b/classification/test.py
deleted file mode 100755
index bcd6b4392..000000000
--- a/classification/test.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from os import listdir, path
-
-directory : str = "./test_input"
-
-def test(classifier, categories):
-    for name in listdir(directory):
-        if name == "README.md":
-            continue
-
-        with open(path.join(directory, name), "r") as file:
-            text = file.read()
-
-        result = classifier(text, categories, multi_label=True)
-
-        print(name)
-        for label, score in zip(result["labels"], result["scores"]):
-            print(f"{label}: {score:.3f}")
-        print("")
diff --git a/classification/test_input/README.md b/classification/test_input/README.md
deleted file mode 100644
index d2dad58c3..000000000
--- a/classification/test_input/README.md
+++ /dev/null
@@ -1,74 +0,0 @@
-For the categories 'semantic', 'instruction', 'mistranslation' and 'other' and the texts from the files, we get the following scores:
-```
-gitlab_semantic_addsubps
-semantic: 0.974
-instruction: 0.931
-other: 0.732
-mistranslation: 0.299
-
-mail_semantic_vmovdqu
-mistranslation: 0.648
-instruction: 0.622
-other: 0.589
-semantic: 0.463
-
-mail_semantic_1
-instruction: 0.915
-other: 0.684
-semantic: 0.670
-mistranslation: 0.198
-
-mail_other_2
-other: 0.952
-instruction: 0.939
-semantic: 0.879
-mistranslation: 0.862
-
-mail_other_3
-other: 0.919
-instruction: 0.680
-mistranslation: 0.679
-semantic: 0.662
-
-mail_semantic_2
-semantic: 0.997
-instruction: 0.974
-mistranslation: 0.637
-other: 0.177
-
-gitlab_semantic_bzhi
-semantic: 0.920
-instruction: 0.623
-mistranslation: 0.171
-other: 0.064
-
-gitlab_semantic_bextr
-semantic: 0.993
-instruction: 0.944
-mistranslation: 0.337
-other: 0.099
-
-gitlab_semantic_blsi
-semantic: 0.983
-instruction: 0.964
-other: 0.609
-mistranslation: 0.606
-
-gitlab_semantic_adox
-semantic: 0.990
-instruction: 0.944
-mistranslation: 0.452
-other: 0.286
-
-gitlab_semantic_blsmsk
-semantic: 0.987
-instruction: 0.962
-mistranslation: 0.603
-other: 0.269
-
-mail_other_1
-other: 0.927
-semantic: 0.916
-instruction: 0.910
-mistranslation: 0.870
-```
diff --git a/classification/test_input/gitlab_semantic_addsubps b/classification/test_input/gitlab_semantic_addsubps
deleted file mode 100644
index 60438ff08..000000000
--- a/classification/test_input/gitlab_semantic_addsubps
+++ /dev/null
@@ -1,23 +0,0 @@
-x86 SSE/SSE2/SSE3 instruction semantic bugs with NaN
-
-Description of problem
-The result of SSE/SSE2/SSE3 instructions with NaN is different from the CPU. From Intel manual Volume 1 Appendix D.4.2.2, they defined the behavior of such instructions with NaN. But I think QEMU did not implement this semantic exactly because the byte result is different.
-
-Steps to reproduce
-
-Compile this code
-
-void main() {
-    asm("mov rax, 0x000000007fffffff; push rax; mov rax, 0x00000000ffffffff; push rax; movdqu XMM1, [rsp];");
-    asm("mov rax, 0x2e711de7aa46af1a; push rax; mov rax, 0x7fffffff7fffffff; push rax; movdqu XMM2, [rsp];");
-    asm("addsubps xmm1, xmm2");
-}
-
-Execute and compare the result with the CPU. This problem happens with other SSE/SSE2/SSE3 instructions specified in the manual, Volume 1 Appendix D.4.2.2.
-
-CPU xmm1[3] = 0xffffffff
-
-QEMU xmm1[3] = 0x7fffffff
-
-Additional information
-This bug is discovered by research conducted by KAIST SoftSec.
diff --git a/classification/test_input/gitlab_semantic_adox b/classification/test_input/gitlab_semantic_adox
deleted file mode 100644
index 9f4471c9a..000000000
--- a/classification/test_input/gitlab_semantic_adox
+++ /dev/null
@@ -1,36 +0,0 @@
-x86 ADOX and ADCX semantic bug
-Description of problem
-The result of instruction ADOX and ADCX are different from the CPU. The value of one of EFLAGS is different.
-
-Steps to reproduce
-
-Compile this code
-
-
-void main() {
-    asm("push 512; popfq;");
-    asm("mov rax, 0xffffffff84fdbf24");
-    asm("mov rbx, 0xb197d26043bec15d");
-    asm("adox eax, ebx");
-}
-
-
-
-Execute and compare the result with the CPU. This problem happens with ADCX, too (with CF).
-
-CPU
-
-OF = 0
-
-
-QEMU
-
-OF = 1
-
-
-
-
-
-
-Additional information
-This bug is discovered by research conducted by KAIST SoftSec.
diff --git a/classification/test_input/gitlab_semantic_bextr b/classification/test_input/gitlab_semantic_bextr
deleted file mode 100644
index dabe16acf..000000000
--- a/classification/test_input/gitlab_semantic_bextr
+++ /dev/null
@@ -1,25 +0,0 @@
-x86 BEXTR semantic bug
-Description of problem
-The result of instruction BEXTR is different with from the CPU. The value of destination register is different. I think QEMU does not consider the operand size limit.
-
-Steps to reproduce
-
-Compile this code
-
-void main() {
-    asm("mov rax, 0x17b3693f77fb6e9");
-    asm("mov rbx, 0x8f635a775ad3b9b4");
-    asm("mov rcx, 0xb717b75da9983018");
-    asm("bextr eax, ebx, ecx");
-}
-
-Execute and compare the result with the CPU.
-
-CPU
-RAX = 0x5a
-
-QEMU
-RAX = 0x635a775a
-
-Additional information
-This bug is discovered by research conducted by KAIST SoftSec.
diff --git a/classification/test_input/gitlab_semantic_blsi b/classification/test_input/gitlab_semantic_blsi
deleted file mode 100644
index 92ff92b0e..000000000
--- a/classification/test_input/gitlab_semantic_blsi
+++ /dev/null
@@ -1,20 +0,0 @@
-x86 BLSI and BLSR semantic bug
-Description of problem
-The result of instruction BLSI and BLSR is different from the CPU. The value of CF is different.
-
-Steps to reproduce
-
-Compile this code
-
-
-void main() {
-    asm("blsi rax, rbx");
-}
-
-
-
-Execute and compare the result with the CPU. The value of CF is exactly the opposite. This problem happens with BLSR, too.
-
-
-Additional information
-This bug is discovered by research conducted by KAIST SoftSec.
diff --git a/classification/test_input/gitlab_semantic_blsmsk b/classification/test_input/gitlab_semantic_blsmsk
deleted file mode 100644
index b950faa21..000000000
--- a/classification/test_input/gitlab_semantic_blsmsk
+++ /dev/null
@@ -1,27 +0,0 @@
-x86 BLSMSK semantic bug
-Description of problem
-The result of instruction BLSMSK is different with from the CPU. The value of CF is different.
-
-Steps to reproduce
-
-Compile this code
-
-void main() {
-    asm("mov rax, 0x65b2e276ad27c67");
-    asm("mov rbx, 0x62f34955226b2b5d");
-    asm("blsmsk eax, ebx");
-}
-
-Execute and compare the result with the CPU.
-
-CPU
-
-CF = 0
-
-
-QEMU
-
-CF = 1
-
-Additional information
-This bug is discovered by research conducted by KAIST SoftSec.
diff --git a/classification/test_input/gitlab_semantic_bzhi b/classification/test_input/gitlab_semantic_bzhi
deleted file mode 100644
index b86da08c7..000000000
--- a/classification/test_input/gitlab_semantic_bzhi
+++ /dev/null
@@ -1,38 +0,0 @@
-x86 BZHI semantic bug
-Description of problem
-The result of instruction BZHI is different from the CPU. The value of destination register and SF of EFLAGS are different.
-
-Steps to reproduce
-
-Compile this code
-
-
-void main() {
-    asm("mov rax, 0xb1aa9da2fe33fe3");
-    asm("mov rbx, 0x80000000ffffffff");
-    asm("mov rcx, 0xf3fce8829b99a5c6");
-    asm("bzhi rax, rbx, rcx");
-}
-
-
-
-Execute and compare the result with the CPU.
-
-CPU
-
-RAX = 0x0x80000000ffffffff
-SF = 1
-
-
-QEMU
-
-RAX = 0xffffffff
-SF = 0
-
-
-
-
-
-
-Additional information
-This bug is discovered by research conducted by KAIST SoftSec.
diff --git a/classification/test_input/mail_other_1 b/classification/test_input/mail_other_1
deleted file mode 100644
index f4a855325..000000000
--- a/classification/test_input/mail_other_1
+++ /dev/null
@@ -1,120 +0,0 @@
-[Bug] x86 EFLAGS refresh is not happening correctly
-
-Hello,
-I'm posting this here instead of opening an issue as it is not clear to me if this is a bug or not.
-The issue is located in function "cpu_compute_eflags" in target/i386/cpu.h
-(
-https://gitlab.com/qemu-project/qemu/-/blob/master/target/i386/cpu.h#L2071
-)
-This function is exectued in an out of cpu loop context.
-It is used to synchronize TCG internal eflags registers (CC_OP, CC_SRC,  etc...) with the CPU eflags field upon loop exit.
-It does:
-    eflags
-|=
-cpu_cc_compute_all
-(
-env
-,
-CC_OP
-)
-|
-(
-env
-->
-df
-&
-DF_MASK
-);
-Shouldn't it be:
-    Â
-eflags
-=
-cpu_cc_compute_all
-(
-env
-,
-CC_OP
-)
-|
-(
-env
-->
-df
-&
-DF_MASK
-);
-as eflags is entirely reevaluated by "cpu_cc_compute_all" ?
-Thanks,
-Kind regards,
-Stevie
-
-On 05/08/21 11:51, Stevie Lavern wrote:
-Shouldn't it be:
-eflags = cpu_cc_compute_all(env, CC_OP) | (env->df & DF_MASK);
-as eflags is entirely reevaluated by "cpu_cc_compute_all" ?
-No, both are wrong.  env->eflags contains flags other than the
-arithmetic flags (OF/SF/ZF/AF/PF/CF) and those have to be preserved.
-The right code is in helper_read_eflags.  You can move it into
-cpu_compute_eflags, and make helper_read_eflags use it.
-Paolo
-
-On 05/08/21 13:24, Paolo Bonzini wrote:
-On 05/08/21 11:51, Stevie Lavern wrote:
-Shouldn't it be:
-eflags = cpu_cc_compute_all(env, CC_OP) | (env->df & DF_MASK);
-as eflags is entirely reevaluated by "cpu_cc_compute_all" ?
-No, both are wrong.  env->eflags contains flags other than the
-arithmetic flags (OF/SF/ZF/AF/PF/CF) and those have to be preserved.
-The right code is in helper_read_eflags.  You can move it into
-cpu_compute_eflags, and make helper_read_eflags use it.
-Ah, actually the two are really the same, the TF/VM bits do not apply to
-cpu_compute_eflags so it's correct.
-What seems wrong is migration of the EFLAGS register.  There should be
-code in cpu_pre_save and cpu_post_load to special-case it and setup
-CC_DST/CC_OP as done in cpu_load_eflags.
-Also, cpu_load_eflags should assert that update_mask does not include
-any of the arithmetic flags.
-Paolo
-
-Thank for your reply!
-It's still a bit cryptic for me.
-I think i need to precise that I'm using a x86_64 custom user-mode,base on linux user-mode, that i'm developing (unfortunately i cannot share the code) with modifications in the translation loop (I've added cpu loop exits on specific instructions which are not control flow instructions).
-If my understanding is correct, in the user-mode case 'cpu_compute_eflags' is called directly by 'x86_cpu_exec_exit' with the intention of synchronizing the CPU env->eflags field with its real value (represented by the CC_* fields).
-I'm not sure how 'cpu_pre_save' and 'cpu_post_load' are involved in this case.

-As you said in your first email, 'helper_read_eflags' seems to be the correct way to go.
-Here is some detail about my current experimentation/understanding of this "issue":
-With the current implementationÂ
-        Â
-eflags |= cpu_cc_compute_all(env, CC_OP) | (env->df & DF_MASK);
-if I exit the loop with a CC_OP different from CC_OP_EFLAGS, I found that the resulting env->eflags may be invalid.
-In my test case, the loop was exiting with eflags = 0x44 and CC_OP = CC_OP_SUBL with CC_DST=1, CC_SRC=258, CC_SRC2=0.
-While 'cpu_cc_compute_all' computes the correct flags (ZF:0, PF:0), the result will still be 0x44 (ZF:1, PF:1) due to the 'or' operation, thus leading to an incorrect eflags value loaded into the CPU env.Â
-In my case, after loop reentry, it led to an invalid branch to be taken.
-Thanks for your time!
-Regards
-Stevie

-On Thu, Aug 5, 2021 at 1:33 PM Paolo Bonzini <
-pbonzini@redhat.com
-> wrote:
-On 05/08/21 13:24, Paolo Bonzini wrote:
-> On 05/08/21 11:51, Stevie Lavern wrote:
->>
->> Shouldn't it be:
->> eflags = cpu_cc_compute_all(env, CC_OP) | (env->df & DF_MASK);
->> as eflags is entirely reevaluated by "cpu_cc_compute_all" ?
->
-> No, both are wrong.  env->eflags contains flags other than the
-> arithmetic flags (OF/SF/ZF/AF/PF/CF) and those have to be preserved.
->
-> The right code is in helper_read_eflags.  You can move it into
-> cpu_compute_eflags, and make helper_read_eflags use it.
-Ah, actually the two are really the same, the TF/VM bits do not apply to
-cpu_compute_eflags so it's correct.
-What seems wrong is migration of the EFLAGS register.  There should be
-code in cpu_pre_save and cpu_post_load to special-case it and setup
-CC_DST/CC_OP as done in cpu_load_eflags.
-Also, cpu_load_eflags should assert that update_mask does not include
-any of the arithmetic flags.
-Paolo
diff --git a/classification/test_input/mail_other_2 b/classification/test_input/mail_other_2
deleted file mode 100644
index df6aceba1..000000000
--- a/classification/test_input/mail_other_2
+++ /dev/null
@@ -1,83 +0,0 @@
-qemu-aarch64-static segfaults running ldconfig.real (amd64 host)
-[ Impact ]
-
- * QEMU crashes when running (emulating) ldconfig in a Ubuntu 22.04 arm64 guest
-
- * This affects the qemu-user-static 1:8.2.2+ds-0ubuntu1 package on Ubuntu 24.04+, running on a amd64 host.
-
- * When running docker containers with Ubuntu 22.04 in them, emulating arm64 with qemu-aarch64-static, invocations of ldconfig (actually ldconfig.real) segfault, leading to problems when loading shared libraries.
-
-[ Test Plan ]
-
- * Reproducer is very easy:
-
-$ sudo snap install docker
-docker 27.5.1 from Canonical** installed
-$ docker run -ti --platform linux/arm64/v8 ubuntu:22.04
-Unable to find image 'ubuntu:22.04' locally
-22.04: Pulling from library/ubuntu
-0d1c17d4e593: Pull complete
-Digest: sha256:ed1544e454989078f5dec1bfdabd8c5cc9c48e0705d07b678ab6ae3fb61952d2
-Status: Downloaded newer image for ubuntu:22.04
-
-# Execute ldconfig.real inside the arm64 guest.
-# This should not crash after the fix!
-root@ad80af5378dc:/# /sbin/ldconfig.real
-qemu: uncaught target signal 11 (Segmentation fault) - core dumped
-Segmentation fault (core dumped)
-
-[ Where problems could occur ]
-
- * This changes the alignment of sections in the ELF binary via QEMUs elfloader, if something goes wrong with this change, it could lead to all kind of crashes (segfault) of any emulated binaries.
-
-[ Other Info ]
-
- * Upstream bug: https://gitlab.com/qemu-project/qemu/-/issues/1913
- * Upstream fix: https://gitlab.com/qemu-project/qemu/-/commit/4b7b20a3
-   - Fix dependency (needed for QEMU < 9.20): https://gitlab.com/qemu-project/qemu/-/commit/c81d1faf
-
---- original bug report ---
-
-This affects the qemu-user-static 1:8.2.2+ds-0ubuntu1 package on Ubuntu 24.04, running on a amd64 host.
-
-When running docker containers with Ubuntu 22.04 in them, emulating arm64 with qemu-aarch64-static, invocations of ldconfig (actually ldconfig.real) segfault. For example:
-
-$ docker run -ti --platform linux/arm64/v8 ubuntu:22.04
-root@8861ff640a1c:/# /sbin/ldconfig.real
-Segmentation fault
-
-If you copy the ldconfig.real binary to the host, and run it directly via qemu-aarch64-static:
-
-$ gdb --args qemu-aarch64-static ./ldconfig.real
-GNU gdb (Ubuntu 15.0.50.20240403-0ubuntu1) 15.0.50.20240403-git
-Copyright (C) 2024 Free Software Foundation, Inc.
-License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
-This is free software: you are free to change and redistribute it.
-There is NO WARRANTY, to the extent permitted by law.
-Type "show copying" and "show warranty" for details.
-This GDB was configured as "x86_64-linux-gnu".
-Type "show configuration" for configuration details.
-For bug reporting instructions, please see:
-<https://www.gnu.org/software/gdb/bugs/>.
-Find the GDB manual and other documentation resources online at:
-    <http://www.gnu.org/software/gdb/documentation/>.
-
-For help, type "help".
-Type "apropos word" to search for commands related to "word"...
-Reading symbols from qemu-aarch64-static...
-Reading symbols from /home/dim/.cache/debuginfod_client/86579812b213be0964189499f62f176bea817bf2/debuginfo...
-(gdb) r
-Starting program: /usr/bin/qemu-aarch64-static ./ldconfig.real
-[Thread debugging using libthread_db enabled]
-Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
-[New Thread 0x7ffff76006c0 (LWP 28378)]
-
-Thread 1 "qemu-aarch64-st" received signal SIGSEGV, Segmentation fault.
-0x00007fffe801645b in ?? ()
-(gdb) disassemble
-No function contains program counter for selected frame.
-
-It looks like this is a known qemu regression after v8.1.1:
-https://gitlab.com/qemu-project/qemu/-/issues/1913
-
-Downgrading the package to qemu-user-static_8.0.4+dfsg-1ubuntu3_amd64.deb fixes the segfault.
diff --git a/classification/test_input/mail_other_3 b/classification/test_input/mail_other_3
deleted file mode 100644
index 504ddc488..000000000
--- a/classification/test_input/mail_other_3
+++ /dev/null
@@ -1,24 +0,0 @@
-
-This fix is fine for me...at least from SDM, HTT depends on topology and
-it should exist when user sets "-smp 4".
-
-I haven't found any other thread :-).
-
-By the way, just curious, in what cases do you need to disbale the HT
-flag? "-smp 4" means 4 cores with 1 thread per core, and is it not
-enough?
-
-As for the “-ht” behavior, I'm also unsure whether this should be fixed
-or not - one possible consideration is whether “-ht” would be useful.
-
-This fix is fine for me...at least from SDM, HTT depends on topology and
-it should exist when user sets "-smp 4".
-
-I haven't found any other thread :-).
-
-By the way, just curious, in what cases do you need to disbale the HT
-flag? "-smp 4" means 4 cores with 1 thread per core, and is it not
-enough?
-
-As for the “-ht” behavior, I'm also unsure whether this should be fixed
-or not - one possible consideration is whether “-ht” would be useful.
diff --git a/classification/test_input/mail_semantic_1 b/classification/test_input/mail_semantic_1
deleted file mode 100644
index af6a2480d..000000000
--- a/classification/test_input/mail_semantic_1
+++ /dev/null
@@ -1,21 +0,0 @@
-AArch64: ISV is set to 1 in ESR_EL2 when taking a data abort with post-indexed instructions
-
-I think that I have a Qemu bug in my hands, but, I could still be missing something. Consider the following instruction:
-0x0000000000000000:  C3 44 00 B8    str   w3, [x6], #4
-
-notice the last #4, I think this is what we would call a post-indexed instruction (falls into the category of instructions with writeback). As I understand it, those instructions should not have ISV=1 in ESR_EL2 when faulting.
-
-Here is the relevant part of the manual:
-
-For other faults reported in ESR_EL2, ISV is 0 except for the following stage 2 aborts:
-• AArch64 loads and stores of a single general-purpose register (including the register specified with 0b11111, including those with Acquire/Release semantics, but excluding Load Exclusive or Store Exclusive and excluding those with writeback).
-
-
-However, I can see that Qemu sets ISV to 1 here. The ARM hardware that I tested gave me a value of ISV=0 for similar instructions.
-
-Another example of instruction: 0x00000000000002f8:  01 1C 40 38    ldrb  w1, [x0, #1]!"""
-reproduce = """1. Run some hypervisor in EL2
-2. Create a guest running at EL1 that executes one of the mentioned instructions (and make the instruction fault by writing to some unmapped page in SLP)
-3. Observe the value of ESR_EL2 on data abort
-
-Unfortunately, I cannot provide an image to reproduce this (the software is not open-source). But, I would be happy to help test a patch.
diff --git a/classification/test_input/mail_semantic_2 b/classification/test_input/mail_semantic_2
deleted file mode 100644
index 4c78171d2..000000000
--- a/classification/test_input/mail_semantic_2
+++ /dev/null
@@ -1,15 +0,0 @@
-x86 BLSMSK semantic bug
-description = """The result of instruction BLSMSK is different with from the CPU. The value of CF is different."""
-reproduce = """1. Compile this code
-void main() {
-    asm("mov rax, 0x65b2e276ad27c67");
-    asm("mov rbx, 0x62f34955226b2b5d");
-    asm("blsmsk eax, ebx");
-}
-
-2. Execute and compare the result with the CPU.
-    - CPU
-        - CF = 0
-    - QEMU
-        - CF = 1"""
-additional = """This bug is discovered by research conducted by KAIST SoftSec."""
diff --git a/classification/test_input/mail_semantic_vmovdqu b/classification/test_input/mail_semantic_vmovdqu
deleted file mode 100644
index 49b1da500..000000000
--- a/classification/test_input/mail_semantic_vmovdqu
+++ /dev/null
@@ -1,49 +0,0 @@
-AVX instruction VMOVDQU implementation error for YMM registers
-Bug Description
-Hi,
-
-Tested with Qemu 4.2.0, and with git version bddff6f6787c916b0e9d63ef9e4d442114257739.
-
-The x86 AVX instruction VMOVDQU doesn't work properly with YMM registers (32 bytes).
-It works with XMM registers (16 bytes) though.
-
-See the attached test case `ymm.c`: when copying from memory-to-ymm0 and then back from ymm0-to-memory using VMOVDQU, Qemu only copies the first 16 of the total 32 bytes.
-
-```
-user@ubuntu ~/Qemu % gcc -o ymm ymm.c -Wall -Wextra -Werror
-
-user@ubuntu ~/Qemu % ./ymm
-00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
-
-user@ubuntu ~/Qemu % ./x86_64-linux-user/qemu-x86_64 -cpu max ymm
-00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-```
-
-This seems to be because in `translate.c > gen_sse()`, the case handling the VMOVDQU instruction calls `gen_ldo_env_A0` which always performs a 16 bytes copy using two 8 bytes load and store operations (with `tcg_gen_qemu_ld_i64` and `tcg_gen_st_i64`).
-
-Instead, the `gen_ldo_env_A0` function should generate a copy with a size corresponding to the used register.
-
-```
-static void gen_sse(CPUX86State *env, DisasContext *s, int b,
-                    target_ulong pc_start, int rex_r)
-{
-        [...]
-        case 0x26f: /* movdqu xmm, ea */
-            if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
-                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
-            } else {
-        [...]
-```
-
-```
-static inline void gen_ldo_env_A0(DisasContext *s, int offset)
-{
-    int mem_index = s->mem_index;
-    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
-    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
-    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
-    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
-}
-```