about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-03-07 21:25:11 +0100
committerptitSeb <sebastien.chev@gmail.com>2024-03-07 21:25:11 +0100
commit8fef5f2c69d0873d1a6371796587b5e892226cde (patch)
tree3b9a231387b0b78c46942a47c526761031af60cd /src
parent9a4a62ff2ddb9ec6286780f046ea785b2bb54306 (diff)
downloadbox64-8fef5f2c69d0873d1a6371796587b5e892226cde.tar.gz
box64-8fef5f2c69d0873d1a6371796587b5e892226cde.zip
[ARM64_DYNAREC] Use convert_bitmask in logic emitters and in MAX32w and MOV64x
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h36
-rw-r--r--src/dynarec/arm64/arm64_immenc.c1
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_logic.c70
3 files changed, 81 insertions, 26 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index a04a7673..46de26cf 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -88,6 +88,11 @@
 #define cLE 0b1101
 #define c__ 0b1110
 
+int convert_bitmask(uint64_t bitmask);
+#define convert_bitmask_w(A)    convert_bitmask(((uint64_t)(A) << 32) + (uint32_t)(A))
+#define convert_bitmask_x(A)    convert_bitmask((uint64_t)A)
+#define convert_bitmask_xw(A)   convert_bitmask(rex.w?((uint64_t)(A)):(((uint64_t)(A) << 32) + (uint32_t)(A)))
+
 #define invCond(cond)   ((cond)^0b0001)
 
 // MOVZ
@@ -113,18 +118,24 @@
 
 // This macro will give a -Wsign-compare warning, probably bug #38341
 #define MOV32w(Rd, imm32) \
-    if(~((uint32_t)(imm32))<0xffffu) {                                      \
-        MOVNw(Rd, (~(uint32_t)(imm32))&0xffff);                             \
-    } else {                                                                \
-        MOVZw(Rd, (imm32)&0xffff);                                          \
-        if((imm32)&0xffff0000) {MOVKw_LSL(Rd, ((imm32)>>16)&0xffff, 16);}   \
+    if(~((uint32_t)(imm32))<0xffffu) {                                                                        \
+        MOVNw(Rd, (~(uint32_t)(imm32))&0xffff);                                                               \
+    } else if((uint32_t)(imm32)>0xffff && convert_bitmask_w(imm32)) {                                        \
+        int mask = convert_bitmask_w(imm32);                                                                  \
+        ORRw_mask(Rd, xZR, mask&0x3F, (mask>>6)&0x3F);                                                        \
+    } else {                                                                                                  \
+        MOVZw(Rd, (imm32)&0xffff);                                                                            \
+        if((imm32)&0xffff0000) {MOVKw_LSL(Rd, ((imm32)>>16)&0xffff, 16);}                                     \
     }
 #define MOV64x(Rd, imm64) \
-    if(~((uint64_t)(imm64))<0xffff) {                                                                       \
-        MOVNx(Rd, (~(uint64_t)(imm64))&0xffff);                                                             \
-    } else {                                                                                                \
-        MOVZx(Rd, ((uint64_t)(imm64))&0xffff);                                                              \
-        if(((uint64_t)(imm64))&0xffff0000) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>16)&0xffff, 16);}           \
+    if(~((uint64_t)(imm64))<0xffff) {                                                                        \
+        MOVNx(Rd, (~(uint64_t)(imm64))&0xffff);                                                              \
+    } else if((uint64_t)(imm64)>0xffff && convert_bitmask_x(imm64)) {                                      \
+        int mask = convert_bitmask_x(imm64);                                                                 \
+        ORRx_mask(Rd, xZR, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F);                                         \
+    } else {                                                                                                 \
+        MOVZx(Rd, ((uint64_t)(imm64))&0xffff);                                                               \
+        if(((uint64_t)(imm64))&0xffff0000) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>16)&0xffff, 16);}            \
         if(((uint64_t)(imm64))&0xffff00000000LL) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>32)&0xffff, 32);}      \
         if(((uint64_t)(imm64))&0xffff000000000000LL) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>48)&0xffff, 48);}  \
     }
@@ -513,14 +524,19 @@
 // logic to get the mask is ... convoluted... list of possible value there: https://gist.github.com/dinfuehr/51a01ac58c0b23e4de9aac313ed6a06a
 #define ANDx_mask(Rd, Rn, N, immr, imms)    EMIT(LOGIC_gen(1, 0b00, N, immr, imms, Rn, Rd))
 #define ANDw_mask(Rd, Rn, immr, imms)       EMIT(LOGIC_gen(0, 0b00, 0, immr, imms, Rn, Rd))
+#define ANDxw_mask(Rd, Rn, N, immr, imms)   EMIT(LOGIC_gen(rex.w, 0b00, rex.w?(N):0, immr, imms, Rn, Rd))
 #define ANDSx_mask(Rd, Rn, N, immr, imms)   EMIT(LOGIC_gen(1, 0b11, N, immr, imms, Rn, Rd))
 #define ANDSw_mask(Rd, Rn, immr, imms)      EMIT(LOGIC_gen(0, 0b11, 0, immr, imms, Rn, Rd))
+#define ANDSxw_mask(Rd, Rn, N, immr, imms)  EMIT(LOGIC_gen(rex.w, 0b11, rex.w?(N):0, immr, imms, Rn, Rd))
 #define ORRx_mask(Rd, Rn, N, immr, imms)    EMIT(LOGIC_gen(1, 0b01, N, immr, imms, Rn, Rd))
 #define ORRw_mask(Rd, Rn, immr, imms)       EMIT(LOGIC_gen(0, 0b01, 0, immr, imms, Rn, Rd))
+#define ORRxw_mask(Rd, Rn, N, immr, imms)   EMIT(LOGIC_gen(rex.w, 0b01, rex.w?(N):0, immr, imms, Rn, Rd))
 #define EORx_mask(Rd, Rn, N, immr, imms)    EMIT(LOGIC_gen(1, 0b10, N, immr, imms, Rn, Rd))
 #define EORw_mask(Rd, Rn, immr, imms)       EMIT(LOGIC_gen(0, 0b10, 0, immr, imms, Rn, Rd))
+#define EORxw_mask(Rd, Rn, N, immr, imms)   EMIT(LOGIC_gen(rex.w, 0b10, rex.w?(N):0, immr, imms, Rn, Rd))
 #define TSTx_mask(Rn, N, immr, imms)        ANDSx_mask(xZR, Rn, N, immr, imms)
 #define TSTw_mask(Rn, immr, imms)           ANDSw_mask(wZR, Rn, immr, imms)
+#define TSTxw_mask(Rn, N, immr, imms)       ANDSxw_mask(xZR, Rn, N, immr, imms)
 
 #define LOGIC_REG_gen(sf, opc, shift, N, Rm, imm6, Rn, Rd)    ((sf)<<31 | (opc)<<29 | 0b01010<<24 | (shift)<<22 | (N)<<21 | (Rm)<<16 | (imm6)<<10 | (Rn)<<5 | (Rd))
 #define ANDx_REG(Rd, Rn, Rm)            EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 0, Rm, 0, Rn, Rd))
diff --git a/src/dynarec/arm64/arm64_immenc.c b/src/dynarec/arm64/arm64_immenc.c
index 5cedf22e..ea28c9fb 100644
--- a/src/dynarec/arm64/arm64_immenc.c
+++ b/src/dynarec/arm64/arm64_immenc.c
@@ -48,6 +48,7 @@ int convert_bitmask(uint64_t bitmask) {
 	int to = 1;
 	while (pat & (one << to)) ++to;
 	
+	// printf("%016lX/%lu: returning %c%c%02lX%02lX\n", bitmask, size, '2' + (size == 6), (uint64_t)(((0x1E << size) & 0x3F) + (to - 1)), (uint64_t)immr);
 	return 0x2000 + ((size == 6) << 12) + ((((0x1E << size) & 0x3F) + (to - 1)) << 6) + immr;
 }
 
diff --git a/src/dynarec/arm64/dynarec_arm64_emit_logic.c b/src/dynarec/arm64/dynarec_arm64_emit_logic.c
index fb3f4545..5381ff69 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_logic.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_logic.c
@@ -60,8 +60,13 @@ void emit_or32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int
     } else IFX(X_ALL) {
         SET_DFNONE(s4);
     }
-    MOV64xw(s3, c);
-    ORRxw_REG(s1, s1, s3);
+    int mask = convert_bitmask_xw(c);
+    if(mask) {
+        ORRxw_mask(s1, s1, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F);
+    } else {
+        MOV64xw(s3, c);
+        ORRxw_REG(s1, s1, s3);
+    }
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
@@ -122,8 +127,13 @@ void emit_xor32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
     } else IFX(X_ALL) {
         SET_DFNONE(s4);
     }
-    MOV64xw(s3, c);
-    EORxw_REG(s1, s1, s3);
+    int mask = convert_bitmask_xw(c);
+    if(mask) {
+        EORxw_mask(s1, s1, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F);
+    } else {
+        MOV64xw(s3, c);
+        EORxw_REG(s1, s1, s3);
+    }
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
@@ -187,11 +197,20 @@ void emit_and32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
     } else IFX(X_ALL) {
         SET_DFNONE(s4);
     }
-    MOV64xw(s3, c);
-    IFX(X_ALL) {
-        ANDSxw_REG(s1, s1, s3);
+    int mask = convert_bitmask_xw(c);
+    if(mask) {
+        IFX(X_ALL) {
+            ANDSxw_mask(s1, s1, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F);
+        } else {
+            ANDxw_mask(s1, s1, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F);
+        }
     } else {
-        ANDxw_REG(s1, s1, s3);
+        MOV64xw(s3, c);
+        IFX(X_ALL) {
+            ANDSxw_REG(s1, s1, s3);
+        } else {
+            ANDxw_REG(s1, s1, s3);
+        }
     }
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
@@ -239,13 +258,18 @@ void emit_or8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
 // emit OR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_or8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
 {
-    MOV32w(s3, c&0xff);
     IFX(X_PEND) {
         SET_DF(s4, d_or8);
     } else IFX(X_ALL) {
         SET_DFNONE(s4);
     }
-    ORRw_REG(s1, s1, s3);
+    int mask = convert_bitmask_w(c);
+    if(mask) {
+        ORRw_mask(s1, s1, mask&0x3F, (mask>>6)&0x3F);
+    } else {
+        MOV32w(s3, c&0xff);
+        ORRw_REG(s1, s1, s3);
+    }
     IFX(X_PEND) {
         STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
@@ -285,13 +309,18 @@ void emit_xor8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
 // emit XOR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_xor8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
 {
-    MOV32w(s3, c&0xff);
     IFX(X_PEND) {
         SET_DF(s4, d_xor8);
     } else IFX(X_ALL) {
         SET_DFNONE(s4);
     }
-    EORw_REG(s1, s1, s3);
+    int mask = convert_bitmask_w(c);
+    if(mask) {
+        EORw_mask(s1, s1, mask&0x3F, (mask>>6)&0x3F);
+    } else {
+        MOV32w(s3, c&0xff);
+        EORw_REG(s1, s1, s3);
+    }
     IFX(X_PEND) {
         STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
@@ -342,16 +371,25 @@ void emit_and8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
 // emit AND8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
 {
-    MOV32w(s3, c&0xff);
     IFX(X_PEND) {
         SET_DF(s4, d_and8);
     } else IFX(X_ALL) {
         SET_DFNONE(s4);
     }
-    IFX(X_ZF) {
-        ANDSw_REG(s1, s1, s3);
+    int mask = convert_bitmask_w(c);
+    if(mask) {
+        IFX(X_ZF) {
+            ANDSw_mask(s1, s1, mask&0x3F, (mask>>6)&0x3F);
+        } else {
+            ANDw_mask(s1, s1, mask&0x3F, (mask>>6)&0x3F);
+        }
     } else {
-        ANDw_REG(s1, s1, s3);
+        MOV32w(s3, c&0xff);
+        IFX(X_ZF) {
+            ANDSw_REG(s1, s1, s3);
+        } else {
+            ANDw_REG(s1, s1, s3);
+        }
     }
     IFX(X_PEND) {
         STRB_U12(s1, xEmu, offsetof(x64emu_t, res));