about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorwannacu <wannacu2049@gmail.com>2023-08-25 10:50:16 +0800
committerwannacu <wannacu2049@gmail.com>2023-08-25 16:43:11 +0800
commit7b1fa28b85092ff36017f55be53c03d2d38ec516 (patch)
tree16d2351fd72f6748861fe1283a23b76b712ce960 /src
parent30475f0edadb7e7d3af5d9ffb66b7b9b2c7299f1 (diff)
downloadbox64-7b1fa28b85092ff36017f55be53c03d2d38ec516.tar.gz
box64-7b1fa28b85092ff36017f55be53c03d2d38ec516.zip
[ARM64_DYNAREC] Added (66) 0F 2A/2C/2D opcodes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h1
-rw-r--r--src/dynarec/arm64/arm64_printer.c8
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c97
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c87
4 files changed, 175 insertions, 18 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 02ba2097..e27011db 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -1360,6 +1360,7 @@
 #define VFRINTRDQ(Vd,Vn, mode)      EMIT(FRINT_vector(1, 0, (mode)&1, 1, ((mode)>>1)&1, Vn, Vd))
 // round with mode, mode is 0 = TieEven, 1=+inf, 2=-inf, 3=zero
 #define VFRINTRSQ(Vd,Vn, mode)      EMIT(FRINT_vector(1, 0, (mode)&1, 0, ((mode)>>1)&1, Vn, Vd))
+#define VFRINTRS(Vd, Vn, mode)      EMIT(FRINT_vector(0, 0, (mode)&1, 0, ((mode)>>1)&1, Vn, Vd))
 
 #define FRINTI_scalar(type, Rn, Rd)  (0b11110<<24 | (type)<<22 | 1<<21 | 0b001<<18 | 0b111<<15 | 0b10000<<10 | (Rn)<<5 | (Rd))
 #define FRINTIS(Sd, Sn)             EMIT(FRINTI_scalar(0b00, Sn, Sd))
diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c
index 2806497b..705314df 100644
--- a/src/dynarec/arm64/arm64_printer.c
+++ b/src/dynarec/arm64/arm64_printer.c
@@ -1302,6 +1302,14 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "FCVT%sS %s, %c%d", roundings[a.c], sf?Xt[Rd]:Wt[Rd], s, Rn);

         return buff;

     }

+    if(isMask(opcode, "0QU01110of100001101o10nnnnnddddd", &a)) {

+        const char* Y[] = {"2S", "4S", "??", "2D"};

+        const char* Z[] = {"S", "S", "??", "D"};

+        const char* Vd = Y[(sf<<1) | a.Q];

+        const char* roundings[] = {"N", "M", "P", "Z"};

+        snprintf(buff, sizeof(buff), "VFCVT%s%s%s%s V%d.%s, V%d.%s", roundings[option], a.U?"U":"S", a.Q?"Q":"", Z[(sf<<1)|a.Q], Rd, Vd, Rn, Vd);

+        return buff;

+    }

 

     // FMOV

     if(isMask(opcode, "00011110pp100000010000nnnnnddddd", &a)) {

diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index a3ef9ef7..d1103c4e 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -302,7 +302,17 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 SMWRITE2();

             }

             break;

-

+        case 0x2A:

+            INST_NAME("CVTPI2PS Gx,Em");

+            nextop = F8;

+            GETGX(v0, 1);

+            GETEM(q1, 0);

+            d0 = fpu_get_scratch(dyn);

+            u8 = sse_setround(dyn, ninst, x1, x2, x3);

+            SCVTFS(d0, q1);

+            x87_restoreround(dyn, ninst, u8);

+            VMOVeD(v0, 0, d0, 0);

+            break;

         case 0x2B:

             INST_NAME("MOVNTPS Ex,Gx");

             nextop = F8;

@@ -317,7 +327,69 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 VST128(v0, ed, fixedaddress);

             }

             break;

-

+        case 0x2C:

+            INST_NAME("CVTTPS2PI Gm,Ex");

+            nextop = F8;

+            GETGM(q0);

+            GETEX(v1, 0, 0);

+            if (box64_dynarec_fastround) {

+                VFCVTZSS(q0, v1);

+            } else {

+                MRS_fpsr(x5);

+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                MSR_fpsr(x5);

+                ORRw_mask(x2, xZR, 1, 0);    //0x80000000

+                d0 = fpu_get_scratch(dyn);

+                for (int i=0; i<2; ++i) {

+                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                    if (i) {

+                        VMOVeS(d0, 0, v1, i);

+                        FRINTZS(d0, d0);

+                    } else {

+                        FRINTZS(d0, v1);

+                    }

+                    FCVTZSwS(x1, d0);

+                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit

+                    TBZ(x5, FPSR_IOC, 4+4);

+                    MOVw_REG(x1, x2);

+                    VMOVQSfrom(q0, i, x1);

+                }

+            }

+            break;

+        case 0x2D:

+            INST_NAME("CVTPS2PI Gm, Ex");

+            nextop = F8;

+            GETGM(q0);

+            GETEX(v1, 0, 0);

+            if (box64_dynarec_fastround) {

+                u8 = sse_setround(dyn, ninst, x1, x2, x3);

+                VFRINTIS(q0, v1);

+                x87_restoreround(dyn, ninst, u8);

+                VFCVTZSS(q0, q0);

+            } else {

+                u8 = sse_setround(dyn, ninst, x1, x2, x3);

+                MRS_fpsr(x5);

+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                MSR_fpsr(x5);

+                ORRw_mask(x2, xZR, 1, 0);    //0x80000000

+                d0 = fpu_get_scratch(dyn);

+                for (int i=0; i<2; ++i) {

+                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                    if (i) {

+                        VMOVeS(d0, 0, v1, i);

+                        FRINTIS(d0, d0);

+                    } else {

+                        FRINTIS(d0, v1);

+                    }

+                    FCVTZSwS(x1, d0);

+                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit

+                    TBZ(x5, FPSR_IOC, 4+4);

+                    MOVw_REG(x1, x2);

+                    VMOVQSfrom(q0, i, x1);

+                }

+                x87_restoreround(dyn, ninst, u8);

+            }

+            break;

         case 0x2E:

             // no special check...

         case 0x2F:

@@ -2039,21 +2111,12 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             GETEM(q1, 0);

             d0 = fpu_get_scratch(dyn);

             d1 = fpu_get_scratch(dyn);

-            USHR_64(d1, q1, 7);

-            MOV32w(x1, 1);

-            VDUPB(d0, x1);

-            VAND(d1, d1, d0);

-            MOV32w(x1, 0xff);

-            VDUPB(d0, x1);

-            VMUL_8(d0, d0, d1); // d0 = byte selection bitmask

-            VAND(d1, q0, d0);   // d1 = masked Gm

-            LDx(x1, xRDI, 0);   // x1 = [rdi]

-            VMOVQDto(x2, d0, 0);

-            MVNx_REG(x2, x2);

-            ANDx_REG(x1, x1, x2); // x1 = clear selected bytes

-            VMOVQDto(x2, d1, 0);

-            ORRx_REG(x1, x1, x2);

-            STx(x1, xRDI, 0);

+            VSSHR_8(d1, q1, 7); // d1 = byte slection mask

+            VLDR64_U12(d0, xRDI, 0);

+            VBIC(d0, d0, d1);   // d0 = clear masked byte

+            VAND(d1, q0, d1);   // d1 = masked Gm

+            VORR(d0, d0, d1);

+            VSTR64_U12(d0, xRDI, 0);

             break;

         case 0xF8:

             INST_NAME("PSUBB Gm, Em");

diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 9f30df50..30cfc937 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -201,7 +201,92 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 SMWRITE2();

             }

             break;

-

+        case 0x2A:

+            INST_NAME("CVTPI2PD Gx,Em");

+            nextop = F8;

+            GETGX(v0, 1);

+            GETEM(q1, 0);

+            SXTL_32(v0, q1);

+            SCVTQFD(v0, v0);

+            break;

+        case 0x2B:

+            INST_NAME("MOVNTPD Ex,Gx");

+            nextop = F8;

+            GETGX(v0, 0);

+            if(MODREG) {

+                ed = (nextop&7)+(rex.b<<3);

+                v1 = sse_get_reg_empty(dyn, ninst, x1, ed);

+                VMOVQ(v1, v0);

+            } else {

+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0);

+                VST128(v0, ed, fixedaddress);

+            }

+            break;

+        case 0x2C:

+            INST_NAME("CVTTPD2PI Gm,Ex");

+            nextop = F8;

+            GETGM(q0);

+            GETEX(v1, 0, 0);

+            if(box64_dynarec_fastround) {

+                VFCVTZSQD(q0, v1);

+                SQXTN_32(q0, q0);

+            } else {

+                MRS_fpsr(x5);

+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                MSR_fpsr(x5);

+                ORRw_mask(x2, xZR, 1, 0);    //0x80000000

+                d0 = fpu_get_scratch(dyn);

+                for (int i=0; i<2; ++i) {

+                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                    if (i) {

+                        VMOVeD(d0, 0, v1, i);

+                        FRINTZD(d0, d0);

+                    } else {

+                        FRINTZD(d0, v1);

+                    }

+                    FCVTZSwD(x1, d0);

+                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit

+                    TBZ(x5, FPSR_IOC, 4+4);

+                    MOVw_REG(x1, x2);

+                    VMOVQSfrom(q0, i, x1);

+                }

+            }

+            break;

+        case 0x2D:

+            INST_NAME("CVTPD2PI Gm,Ex");

+            nextop = F8;

+            GETGM(q0);

+            GETEX(v1, 0, 0);

+            if(box64_dynarec_fastround) {

+                u8 = sse_setround(dyn, ninst, x1, x2, x3);

+                VFRINTIDQ(q0, v1);

+                FCVTXN(q0, q0);

+                x87_restoreround(dyn, ninst, u8);

+                VFCVTZSS(q0, q0);

+            } else {

+                u8 = sse_setround(dyn, ninst, x1, x2, x3);

+                MRS_fpsr(x5);

+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                MSR_fpsr(x5);

+                ORRw_mask(x2, xZR, 1, 0);    //0x80000000

+                d0 = fpu_get_scratch(dyn);

+                for (int i=0; i<2; ++i) {

+                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                    if (i) {

+                        VMOVeD(d0, 0, v1, i);

+                        FRINTID(d0, d0);

+                    } else {

+                        FRINTID(d0, v1);

+                    }

+                    FCVTZSwD(x1, d0);

+                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit

+                    TBZ(x5, FPSR_IOC, 4+4);

+                    MOVw_REG(x1, x2);

+                    VMOVQSfrom(q0, i, x1);

+                }

+                x87_restoreround(dyn, ninst, u8);

+            }

+            break;

         case 0x2E:

             // no special check...

         case 0x2F: