diff options
| -rwxr-xr-x | classification/main.py | 15 | ||||
| -rw-r--r-- | classification/output.py | 4 | ||||
| -rw-r--r-- | classification/semantic_issues/gitlab_semantic_addsubps | 23 | ||||
| -rw-r--r-- | classification/semantic_issues/gitlab_semantic_adox | 36 | ||||
| -rw-r--r-- | classification/semantic_issues/gitlab_semantic_bextr | 25 | ||||
| -rw-r--r-- | classification/semantic_issues/gitlab_semantic_blsi | 20 | ||||
| -rw-r--r-- | classification/semantic_issues/gitlab_semantic_blsmsk | 27 | ||||
| -rw-r--r-- | classification/semantic_issues/gitlab_semantic_bzhi | 38 |
8 files changed, 184 insertions, 4 deletions
diff --git a/classification/main.py b/classification/main.py index 3394a3fb..e38460ac 100755 --- a/classification/main.py +++ b/classification/main.py @@ -11,7 +11,9 @@ parser.add_argument('-d', '--deepseek', action='store_true') parser.add_argument('-t', '--test', action='store_true') args = parser.parse_args() -categories = ['semantic', 'other', 'mistranslation', 'instruction'] +positive_categories = ['semantic', 'mistranslation', 'instruction', 'assembly'] # to add: register +negative_categories = ['other', 'boot', 'network', 'KVM', 'vnc', 'graphic', 'device', 'socket'] # to add: performance +categories = positive_categories + negative_categories def main(): if args.deepseek: @@ -24,12 +26,21 @@ def main(): exit() bugs = list_files_recursive("../mailinglist/output_mailinglist") + bugs = bugs + list_files_recursive("./semantic_issues") for bug in bugs: + print(f"Processing {bug}") with open(bug, "r") as file: text = file.read() result = classifier(text, categories, multi_label=True) - output(text, result['labels'], result['scores'], path.basename(bug)) + category = result['labels'][0] + + for label, score in zip(result["labels"], result["scores"]): + if label in negative_categories and score >= 0.92: + category = label + break + + output(text, category, result['labels'], result['scores'], path.basename(bug)) if __name__ == "__main__": main() diff --git a/classification/output.py b/classification/output.py index df64dbcc..971c3830 100644 --- a/classification/output.py +++ b/classification/output.py @@ -1,7 +1,7 @@ from os import path, makedirs -def output(text : str, labels : list, scores : list, identifier : str): - file_path = f"output/{labels[0]}/{identifier}" +def output(text : str, category : str, labels : list, scores : list, identifier : str): + file_path = f"output/{category}/{identifier}" makedirs(path.dirname(file_path), exist_ok = True) with open(file_path, "w") as file: diff --git a/classification/semantic_issues/gitlab_semantic_addsubps b/classification/semantic_issues/gitlab_semantic_addsubps new file mode 100644 index 00000000..60438ff0 --- /dev/null +++ b/classification/semantic_issues/gitlab_semantic_addsubps @@ -0,0 +1,23 @@ +x86 SSE/SSE2/SSE3 instruction semantic bugs with NaN + +Description of problem +The result of SSE/SSE2/SSE3 instructions with NaN is different from the CPU. From Intel manual Volume 1 Appendix D.4.2.2, they defined the behavior of such instructions with NaN. But I think QEMU did not implement this semantic exactly because the byte result is different. + +Steps to reproduce + +Compile this code + +void main() { + asm("mov rax, 0x000000007fffffff; push rax; mov rax, 0x00000000ffffffff; push rax; movdqu XMM1, [rsp];"); + asm("mov rax, 0x2e711de7aa46af1a; push rax; mov rax, 0x7fffffff7fffffff; push rax; movdqu XMM2, [rsp];"); + asm("addsubps xmm1, xmm2"); +} + +Execute and compare the result with the CPU. This problem happens with other SSE/SSE2/SSE3 instructions specified in the manual, Volume 1 Appendix D.4.2.2. + +CPU xmm1[3] = 0xffffffff + +QEMU xmm1[3] = 0x7fffffff + +Additional information +This bug is discovered by research conducted by KAIST SoftSec. diff --git a/classification/semantic_issues/gitlab_semantic_adox b/classification/semantic_issues/gitlab_semantic_adox new file mode 100644 index 00000000..9f4471c9 --- /dev/null +++ b/classification/semantic_issues/gitlab_semantic_adox @@ -0,0 +1,36 @@ +x86 ADOX and ADCX semantic bug +Description of problem +The result of instruction ADOX and ADCX are different from the CPU. The value of one of EFLAGS is different. + +Steps to reproduce + +Compile this code + + +void main() { + asm("push 512; popfq;"); + asm("mov rax, 0xffffffff84fdbf24"); + asm("mov rbx, 0xb197d26043bec15d"); + asm("adox eax, ebx"); +} + + + +Execute and compare the result with the CPU. This problem happens with ADCX, too (with CF). + +CPU + +OF = 0 + + +QEMU + +OF = 1 + + + + + + +Additional information +This bug is discovered by research conducted by KAIST SoftSec. diff --git a/classification/semantic_issues/gitlab_semantic_bextr b/classification/semantic_issues/gitlab_semantic_bextr new file mode 100644 index 00000000..dabe16ac --- /dev/null +++ b/classification/semantic_issues/gitlab_semantic_bextr @@ -0,0 +1,25 @@ +x86 BEXTR semantic bug +Description of problem +The result of instruction BEXTR is different with from the CPU. The value of destination register is different. I think QEMU does not consider the operand size limit. + +Steps to reproduce + +Compile this code + +void main() { + asm("mov rax, 0x17b3693f77fb6e9"); + asm("mov rbx, 0x8f635a775ad3b9b4"); + asm("mov rcx, 0xb717b75da9983018"); + asm("bextr eax, ebx, ecx"); +} + +Execute and compare the result with the CPU. + +CPU +RAX = 0x5a + +QEMU +RAX = 0x635a775a + +Additional information +This bug is discovered by research conducted by KAIST SoftSec. diff --git a/classification/semantic_issues/gitlab_semantic_blsi b/classification/semantic_issues/gitlab_semantic_blsi new file mode 100644 index 00000000..92ff92b0 --- /dev/null +++ b/classification/semantic_issues/gitlab_semantic_blsi @@ -0,0 +1,20 @@ +x86 BLSI and BLSR semantic bug +Description of problem +The result of instruction BLSI and BLSR is different from the CPU. The value of CF is different. + +Steps to reproduce + +Compile this code + + +void main() { + asm("blsi rax, rbx"); +} + + + +Execute and compare the result with the CPU. The value of CF is exactly the opposite. This problem happens with BLSR, too. + + +Additional information +This bug is discovered by research conducted by KAIST SoftSec. diff --git a/classification/semantic_issues/gitlab_semantic_blsmsk b/classification/semantic_issues/gitlab_semantic_blsmsk new file mode 100644 index 00000000..b950faa2 --- /dev/null +++ b/classification/semantic_issues/gitlab_semantic_blsmsk @@ -0,0 +1,27 @@ +x86 BLSMSK semantic bug +Description of problem +The result of instruction BLSMSK is different with from the CPU. The value of CF is different. + +Steps to reproduce + +Compile this code + +void main() { + asm("mov rax, 0x65b2e276ad27c67"); + asm("mov rbx, 0x62f34955226b2b5d"); + asm("blsmsk eax, ebx"); +} + +Execute and compare the result with the CPU. + +CPU + +CF = 0 + + +QEMU + +CF = 1 + +Additional information +This bug is discovered by research conducted by KAIST SoftSec. diff --git a/classification/semantic_issues/gitlab_semantic_bzhi b/classification/semantic_issues/gitlab_semantic_bzhi new file mode 100644 index 00000000..b86da08c --- /dev/null +++ b/classification/semantic_issues/gitlab_semantic_bzhi @@ -0,0 +1,38 @@ +x86 BZHI semantic bug +Description of problem +The result of instruction BZHI is different from the CPU. The value of destination register and SF of EFLAGS are different. + +Steps to reproduce + +Compile this code + + +void main() { + asm("mov rax, 0xb1aa9da2fe33fe3"); + asm("mov rbx, 0x80000000ffffffff"); + asm("mov rcx, 0xf3fce8829b99a5c6"); + asm("bzhi rax, rbx, rcx"); +} + + + +Execute and compare the result with the CPU. + +CPU + +RAX = 0x0x80000000ffffffff +SF = 1 + + +QEMU + +RAX = 0xffffffff +SF = 0 + + + + + + +Additional information +This bug is discovered by research conducted by KAIST SoftSec. |