about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-11-13 21:22:25 +0800
committerGitHub <noreply@github.com>2024-11-13 14:22:25 +0100
commit506cb980b10b8850c9a2aaac1e4d97104617ba15 (patch)
treea8835f6427387ca4548138192e1311f0672725bb /src
parent937e2cf853255dd636388134c882c6277ce74552 (diff)
downloadbox64-506cb980b10b8850c9a2aaac1e4d97104617ba15.tar.gz
box64-506cb980b10b8850c9a2aaac1e4d97104617ba15.zip
[RV64_DYNAREC] Prefer AMO* instructions over LR/SC when possible (#2028)
* [RV64_DYNAREC] Prefer AMO* instructions over LR/SC when possible

* fixes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_2.c22
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f0.c160
-rw-r--r--src/dynarec/rv64/rv64_emitter.h26
4 files changed, 87 insertions, 123 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c
index 3390be54..c31d760a 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_2.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_2.c
@@ -270,7 +270,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             }
             break;
         case 0x87:
-            INST_NAME("(LOCK)XCHG Ed, Gd");
+            INST_NAME("(LOCK) XCHG Ed, Gd");
             nextop = F8;
             if(MODREG) {
                 GETGD;
@@ -282,19 +282,23 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 GETGD;
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
                 SMDMB();
-                ANDI(x3, ed, (1<<(2+rex.w))-1);
+                ANDI(x3, ed, (1 << (2 + rex.w)) - 1);
                 BNE_MARK(x3, xZR);
-                MARKLOCK;
-                LRxw(x1, ed, 1, 0);
-                SCxw(x3, gd, ed, 0, 1);
-                BNE_MARKLOCK(x3, xZR);
-                B_MARK2_nocond;
+                AMOSWAPxw(gd, gd, ed, 1, 1);
+                if (!rex.w) ZEROUP(gd);
+                B_NEXT_nocond;
                 MARK;
+                // Unaligned
+                ANDI(x5, EDEADLK, -(1 << (rex.w + 2)));
                 LDxw(x1, ed, 0);
+                MARKLOCK;
+                LDxw(x1, wback, 0);
+                LRxw(x3, x5, 1, 1);
+                SCxw(x4, x3, x5, 1, 1);
+                BNEZ_MARKLOCK(x4);
                 SDxw(gd, ed, 0);
-                MARK2;
-                SMDMB();
                 MVxw(gd, x1);
+                SMDMB();
             }
             break;
         case 0x88:
diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c
index a4df72f7..f5559843 100644
--- a/src/dynarec/rv64/dynarec_rv64_66.c
+++ b/src/dynarec/rv64/dynarec_rv64_66.c
@@ -561,7 +561,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             emit_test16(dyn, ninst, x1, x2, x6, x4, x5);
             break;
         case 0x87:
-            INST_NAME("(LOCK)XCHG Ew, Gw");
+            INST_NAME("(LOCK) XCHG Ew, Gw");
             nextop = F8;
             if(MODREG) {
                 GETGD;
diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c
index 3e504ca6..fc8c46ad 100644
--- a/src/dynarec/rv64/dynarec_rv64_f0.c
+++ b/src/dynarec/rv64/dynarec_rv64_f0.c
@@ -64,11 +64,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 emit_add32(dyn, ninst, rex, ed, gd, x3, x4, x5);
             } else {
                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
-                MARKLOCK;
-                LRxw(x1, wback, 1, 1);
-                ADDxw(x4, x1, gd);
-                SCxw(x3, x4, wback, 1, 1);
-                BNEZ_MARKLOCK(x3);
+                AMOADDxw(x1, gd, wback, 1, 1);
                 IFX(X_ALL|X_PEND) {
                     emit_add32(dyn, ninst, rex, x1, gd, x3, x4, x5);
                 }
@@ -86,11 +82,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 emit_or32(dyn, ninst, rex, ed, gd, x3, x4);
             } else {
                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
-                MARKLOCK;
-                LRxw(x1, wback, 1, 1);
-                OR(x4, x1, gd);
-                SCxw(x3, x4, wback, 1, 1);
-                BNEZ_MARKLOCK(x3);
+                AMOORxw(x1, gd, wback, 1, 1);
                 IFX(X_ALL|X_PEND)
                     emit_or32(dyn, ninst, rex, x1, gd, x3, x4);
             }
@@ -264,11 +256,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 emit_add32(dyn, ninst, rex, ed, gd, x3, x4, x5);
                             } else {
                                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
-                                MARKLOCK;
-                                LRxw(x1, wback, 1, 1);
-                                ADDxw(x4, x1, gd);
-                                SCxw(x3, x4, wback, 1, 1);
-                                BNEZ_MARKLOCK(x3);
+                                AMOADDxw(x1, gd, wback, 1, 1);
                                 IFX(X_ALL|X_PEND) {
                                     MVxw(x2, x1);
                                     emit_add32(dyn, ninst, rex, x2, gd, x3, x4, x5);
@@ -463,11 +451,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 emit_and32(dyn, ninst, rex, ed, gd, x3, x4);
             } else {
                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
-                MARKLOCK;
-                LRxw(x1, wback, 1, 1);
-                AND(x4, x1, gd);
-                SCxw(x3, x4, wback, 1, 1);
-                BNEZ_MARKLOCK(x3);
+                AMOANDxw(x1, gd, wback, 1, 1);
                 IFX(X_ALL|X_PEND)
                     emit_and32(dyn, ninst, rex, x1, gd, x3, x4);
             }
@@ -484,11 +468,8 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 emit_sub32(dyn, ninst, rex, ed, gd, x3, x4, x5);
             } else {
                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
-                MARKLOCK;
-                LRxw(x1, wback, 1, 1);
-                SUB(x4, x1, gd);
-                SCxw(x3, x4, wback, 1, 1);
-                BNEZ_MARKLOCK(x3);
+                SUBxw(x4, xZR, gd);
+                AMOADDxw(x1, x4, wback, 1, 1);
                 IFX(X_ALL|X_PEND)
                     emit_sub32(dyn, ninst, rex, x1, gd, x3, x4, x5);
             }
@@ -517,11 +498,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         ANDI(x3, wback, ~3); // aligned addr
                         ADDI(x1, xZR, u8);
                         SLL(x1, x1, x2);     // Ib << offset
-                        MARKLOCK;
-                        LR_W(x4, x3, 1, 1);
-                        OR(x6, x4, x1);
-                        SC_W(x6, x6, x3, 1, 1);
-                        BNEZ_MARKLOCK(x6);
+                        AMOORxw(x4, x1, x3, 1, 1);
                         IFX(X_ALL|X_PEND) {
                             SRL(x1, x4, x2);
                             ANDI(x1, x1, 0xFF);
@@ -553,32 +530,24 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     } else {
                         SMDMB();
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
-                        if(opcode==0x81) i64 = F32S; else i64 = F8S;
-                        if (i64 < -2048 || i64 >= 2048)
-                            MOV64xw(x9, i64);
+                        if (opcode == 0x81)
+                            i64 = F32S;
+                        else
+                            i64 = F8S;
+                        MOV64xw(x9, i64);
                         ANDI(x1, wback, (1 << (rex.w + 2)) - 1);
                         BNEZ_MARK3(x1);
                         // Aligned
-                        MARKLOCK;
-                        LRxw(x1, wback, 1, 1);
-                        if (i64 >= -2048 && i64 < 2048)
-                            ADDIxw(x4, x1, i64);
-                        else
-                            ADDxw(x4, x1, x9);
-                        SCxw(x3, x4, wback, 1, 1);
-                        BNEZ_MARKLOCK(x3);
+                        AMOADDxw(x1, x9, wback, 1, 1);
                         B_MARK_nocond;
                         MARK3;
                         // Unaligned
                         ANDI(x5, wback, -(1 << (rex.w + 2)));
                         MARK2; // Use MARK2 as a "MARKLOCK" since we're running out of marks.
-                        LDxw(x6, wback, 0);
-                        LRxw(x1, x5, 1, 1);
-                        if (i64 >= -2048 && i64 < 2048)
-                            ADDIxw(x4, x6, i64);
-                        else
-                            ADDxw(x4, x6, x9);
-                        SCxw(x3, x1, x5, 1, 1);
+                        LDxw(x1, wback, 0);
+                        LRxw(x6, x5, 1, 1);
+                        ADDxw(x4, x1, x9);
+                        SCxw(x3, x6, x5, 1, 1);
                         BNEZ_MARK2(x3);
                         SDxw(x4, wback, 0);
                         MARK;
@@ -600,19 +569,12 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         emit_or32c(dyn, ninst, rex, ed, i64, x3, x4);
                     } else {
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
-                        if(opcode==0x81) i64 = F32S; else i64 = F8S;
-                        if (i64 < -2048 || i64 >= 2048)
-                            MOV64xw(x9, i64);
-                        MARKLOCK;
-                        LRxw(x1, wback, 1, 1);
-                        if (i64 >= -2048 && i64 < 2048) {
-                            ORI(x4, x1, i64);
-                        } else {
-                            OR(x4, x1, x9);
-                        }
-                        if (!rex.w) ZEROUP(x4);
-                        SCxw(x3, x4, wback, 1, 1);
-                        BNEZ_MARKLOCK(x3);
+                        if (opcode == 0x81)
+                            i64 = F32S;
+                        else
+                            i64 = F8S;
+                        MOV64xw(x4, i64);
+                        AMOORxw(x1, x4, wback, 1, 1);
                         IFX(X_ALL|X_PEND)
                             emit_or32c(dyn, ninst, rex, x1, i64, x3, x4);
                     }
@@ -630,19 +592,12 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         emit_and32c(dyn, ninst, rex, ed, i64, x3, x4);
                     } else {
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
-                        if(opcode==0x81) i64 = F32S; else i64 = F8S;
-                        if (i64 < -2048 || i64 >= 2048)
-                            MOV64xw(x9, i64);
-                        MARKLOCK;
-                        LRxw(x1, wback, 1, 1);
-                        if (i64 >= -2048 && i64 < 2048) {
-                            ANDI(x4, x1, i64);
-                        } else {
-                            AND(x4, x1, x9);
-                        }
-                        if (!rex.w) ZEROUP(x4);
-                        SCxw(x3, x4, wback, 1, 1);
-                        BNEZ_MARKLOCK(x3);
+                        if (opcode == 0x81)
+                            i64 = F32S;
+                        else
+                            i64 = F8S;
+                        MOV64xw(x9, i64);
+                        AMOANDxw(x1, x9, wback, 1, 1);
                         IFX(X_ALL|X_PEND)
                             emit_and32c(dyn, ninst, rex, x1, i64, x3, x4);
                     }
@@ -660,34 +615,25 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x6);
                     } else {
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
-                        if(opcode==0x81) i64 = F32S; else i64 = F8S;
-                        if (i64 <= -2048 || i64 > 2048)
-                            MOV64xw(x9, i64);
+                        if (opcode == 0x81)
+                            i64 = F32S;
+                        else
+                            i64 = F8S;
+                        MOV64xw(x9, i64);
                         ANDI(x1, wback, (1 << (rex.w + 2)) - 1);
                         BNEZ_MARK3(x1);
                         // Aligned
-                        MARKLOCK;
-                        LRxw(x1, wback, 1, 1);
-                        if (i64 > -2048 && i64 <= 2048) {
-                            ADDIxw(x4, x1, -i64);
-                        } else {
-                            SUBxw(x4, x1, x9);
-                        }
-                        SCxw(x3, x4, wback, 1, 1);
-                        BNEZ_MARKLOCK(x3);
+                        SUB(x4, xZR, x9);
+                        AMOADDxw(x1, x4, wback, 1, 1);
                         B_MARK_nocond;
                         MARK3;
                         // Unaligned
                         ANDI(x5, wback, -(1 << (rex.w + 2)));
                         MARK2; // Use MARK2 as a "MARKLOCK" since we're running out of marks.
-                        LDxw(x6, wback, 0);
-                        LRxw(x1, x5, 1, 1);
-                        if (i64 > -2048 && i64 <= 2048) {
-                            ADDIxw(x4, x6, -i64);
-                        } else {
-                            SUBxw(x4, x6, x9);
-                        }
-                        SCxw(x3, x1, x5, 1, 1);
+                        LDxw(x1, wback, 0);
+                        LRxw(x6, x5, 1, 1);
+                        SUBxw(x4, x1, x9);
+                        SCxw(x3, x6, x5, 1, 1);
                         BNEZ_MARK2(x3);
                         SDxw(x4, wback, 0);
                         MARK;
@@ -716,17 +662,8 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                             i64 = F32S;
                         else
                             i64 = F8S;
-                        if (i64 < -2048 || i64 >= 2048)
-                            MOV64xw(x9, i64);
-                        MARKLOCK;
-                        LRxw(x1, wback, 1, 1);
-                        if (i64 >= -2048 && i64 < 2048) {
-                            XORI(x4, x1, i64);
-                        } else {
-                            XOR(x4, x1, x9);
-                        }
-                        SCxw(x3, x4, wback, 1, 1);
-                        BNEZ_MARKLOCK(x3);
+                        MOV64xw(x9, i64);
+                        AMOXORxw(x1, x9, wback, 1, 1);
                         IFX(X_ALL | X_PEND)
                             emit_xor32c(dyn, ninst, rex, x1, i64, x3, x4);
                     }
@@ -751,21 +688,18 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 ANDI(x1, wback, (1 << (rex.w + 2)) - 1);
                 BNEZ_MARK3(x1);
                 // Aligned
-                MARKLOCK;
-                LRxw(x1, wback, 1, 1);
-                SCxw(x4, gd, wback, 1, 1);
-                BNEZ_MARKLOCK(x4);
-                B_MARK_nocond;
+                AMOSWAPxw(gd, gd, wback, 1, 1);
+                if (!rex.w) ZEROUP(gd);
+                B_NEXT_nocond;
                 MARK3;
                 // Unaligned
                 ANDI(x5, wback, -(1 << (rex.w + 2)));
-                MARK2; // Use MARK2 as a "MARKLOCK" since we're running out of marks.
+                MARKLOCK;
                 LDxw(x1, wback, 0);
                 LRxw(x3, x5, 1, 1);
                 SCxw(x4, x3, x5, 1, 1);
-                BNEZ_MARK2(x4);
+                BNEZ_MARKLOCK(x4);
                 SDxw(gd, wback, 0);
-                MARK;
                 MVxw(gd, x1);
                 SMDMB();
             }
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index f56b68e7..4e199e00 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -561,6 +561,14 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define SC_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010, rd, 0b0101111))
 
 #define AMOSWAP_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b010, rd, 0b0101111))
+#define AMOADD_W(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b00000, aq, rl), rs2, rs1, 0b010, rd, 0b0101111))
+#define AMOXOR_W(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b00100, aq, rl), rs2, rs1, 0b010, rd, 0b0101111))
+#define AMOAND_W(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b01100, aq, rl), rs2, rs1, 0b010, rd, 0b0101111))
+#define AMOOR_W(rd, rs2, rs1, aq, rl)   EMIT(R_type(AQ_RL(0b01000, aq, rl), rs2, rs1, 0b010, rd, 0b0101111))
+#define AMOMIN_W(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b10000, aq, rl), rs2, rs1, 0b010, rd, 0b0101111))
+#define AMOMAX_W(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b10100, aq, rl), rs2, rs1, 0b010, rd, 0b0101111))
+#define AMOMINU_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b11000, aq, rl), rs2, rs1, 0b010, rd, 0b0101111))
+#define AMOMAXU_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b11100, aq, rl), rs2, rs1, 0b010, rd, 0b0101111))
 
 // RV64A
 #define LR_D(rd, rs1, aq, rl)      EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b011, rd, 0b0101111))
@@ -570,6 +578,24 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define SCxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111))
 
 #define AMOSWAP_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b011, rd, 0b0101111))
+#define AMOADD_D(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b00000, aq, rl), rs2, rs1, 0b011, rd, 0b0101111))
+#define AMOXOR_D(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b00100, aq, rl), rs2, rs1, 0b011, rd, 0b0101111))
+#define AMOAND_D(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b01100, aq, rl), rs2, rs1, 0b011, rd, 0b0101111))
+#define AMOOR_D(rd, rs2, rs1, aq, rl)   EMIT(R_type(AQ_RL(0b01000, aq, rl), rs2, rs1, 0b011, rd, 0b0101111))
+#define AMOMIN_D(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b10000, aq, rl), rs2, rs1, 0b011, rd, 0b0101111))
+#define AMOMAX_D(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b10100, aq, rl), rs2, rs1, 0b011, rd, 0b0101111))
+#define AMOMINU_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b11000, aq, rl), rs2, rs1, 0b011, rd, 0b0101111))
+#define AMOMAXU_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b11100, aq, rl), rs2, rs1, 0b011, rd, 0b0101111))
+
+#define AMOSWAPxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111))
+#define AMOADDxw(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b00000, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111))
+#define AMOXORxw(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b00100, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111))
+#define AMOANDxw(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b01100, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111))
+#define AMOORxw(rd, rs2, rs1, aq, rl)   EMIT(R_type(AQ_RL(0b01000, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111))
+#define AMOMINxw(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b10000, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111))
+#define AMOMAXxw(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b10100, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111))
+#define AMOMINUxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b11000, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111))
+#define AMOMAXUxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b11100, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111))
 
 // RV32F
 // Read round mode