#define _GNU_SOURCE #include #include #include #include #include #include #include #include "x64_signals.h" #include "os.h" #include "debug.h" #include "box64context.h" #include "box64cpu.h" #include "emu/x64emu_private.h" #include "tools/bridge_private.h" #include "x64emu.h" #include "box64stack.h" #include "callback.h" #include "emu/x64run_private.h" #include "emu/x87emu_private.h" #include "x64trace.h" #include "mysignal.h" #include "emit_signals.h" #include "dynarec_native.h" #include "custommem.h" #include "bridge.h" #include "dynarec_native_functions.h" void native_fstp(x64emu_t* emu, void* p) { if(ST0.q!=STld(0).uref) D2LD(&ST0.d, p); else memcpy(p, &STld(0).ld, 10); } void native_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n) { (void)emu; dynarec_log(LOG_INFO, "Debug Register R%lu=0x%lx (%lu)\n", n, reg, reg); } void native_f2xm1(x64emu_t* emu) { ST0.d = expm1(LN2 * ST0.d); } void native_fyl2x(x64emu_t* emu) { ST(1).d = log2(ST0.d)*ST(1).d; } void native_ftan(x64emu_t* emu) { #pragma STDC FENV_ACCESS ON // seems that tan of glib doesn't follow the rounding direction mode ST0.d = tan(ST0.d); emu->sw.f.F87_C2 = 0; } void native_fpatan(x64emu_t* emu) { #pragma STDC FENV_ACCESS ON ST1.d = atan2(ST1.d, ST0.d); } void native_fxtract(x64emu_t* emu) { int tmp32s; if(isnan(ST1.d)) { ST0.d = ST1.d; } else if(isinf(ST1.d)) { ST0.d = ST1.d; ST1.d = INFINITY; } else if(ST1.d==0.0) { ST0.d = ST1.d; ST1.d = -INFINITY; } else { // LD80bits doesn't have implicit "1" bit, so need to adjust for that ST0.d = frexp(ST1.d, &tmp32s)*2; ST1.d = tmp32s-1; } } void native_fprem(x64emu_t* emu) { int64_t ll = (int64_t)trunc(ST0.d / ST1.d); ST0.d = ST0.d - (ST1.d * ll); emu->sw.f.F87_C2 = 0; emu->sw.f.F87_C1 = (ll & 1) ? 1 : 0; emu->sw.f.F87_C3 = (ll & 2) ? 1 : 0; emu->sw.f.F87_C0 = (ll & 4) ? 1 : 0; } void native_fyl2xp1(x64emu_t* emu) { ST(1).d = log1p(ST0.d)*ST(1).d/LN2; } void native_fsincos(x64emu_t* emu) { #pragma STDC FENV_ACCESS ON // seems that sincos of glib doesn't follow the rounding direction mode sincos(ST1.d, &ST1.d, &ST0.d); emu->sw.f.F87_C2 = 0; } void native_frndint(x64emu_t* emu) { ST0.d = fpu_round(emu, ST0.d); } void native_fscale(x64emu_t* emu) { #pragma STDC FENV_ACCESS ON if(ST0.d!=0.0) ST0.d = ldexp(ST0.d, trunc(ST1.d)); } void native_fsin(x64emu_t* emu) { #pragma STDC FENV_ACCESS ON // seems that sin of glib doesn't follow the rounding direction mode ST0.d = sin(ST0.d); emu->sw.f.F87_C2 = 0; } void native_fcos(x64emu_t* emu) { #pragma STDC FENV_ACCESS ON // seems that cos of glib doesn't follow the rounding direction mode ST0.d = cos(ST0.d); emu->sw.f.F87_C2 = 0; } void native_fbld(x64emu_t* emu, uint8_t* ed) { fpu_fbld(emu, ed); } void native_fild64(x64emu_t* emu, int64_t* ed) { int64_t tmp; memcpy(&tmp, ed, sizeof(tmp)); ST0.d = tmp; STll(0).sq = tmp; STll(0).sref = ST0.sq; } void native_fbstp(x64emu_t* emu, uint8_t* ed) { fpu_fbst(emu, ed); } void native_fistp64(x64emu_t* emu, int64_t* ed) { // used of memcpy to avoid aligments issues if(STll(0).sref==ST(0).sq) { memcpy(ed, &STll(0).sq, sizeof(int64_t)); } else { int64_t tmp; if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, (double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d)) tmp = 0x8000000000000000LL; else tmp = fpu_round(emu, ST0.d); memcpy(ed, &tmp, sizeof(tmp)); } } void native_fistt64(x64emu_t* emu, int64_t* ed) { // used of memcpy to avoid alignments issues int64_t tmp = ST0.d; memcpy(ed, &tmp, sizeof(tmp)); } void native_fld(x64emu_t* emu, uint8_t* ed) { memcpy(&STld(0).ld, ed, 10); LD2D(&STld(0), &ST(0).d); STld(0).uref = ST0.q; } void native_ud(x64emu_t* emu) { if(BOX64ENV(dynarec_test)) emu->test.test = 0; EmitSignal(emu, X64_SIGILL, (void*)R_RIP, 0); } void native_br(x64emu_t* emu) { if(BOX64ENV(dynarec_test)) emu->test.test = 0; EmitSignal(emu, X64_SIGSEGV, (void*)R_RIP, 0xb09d); } void native_priv(x64emu_t* emu) { emu->test.test = 0; EmitSignal(emu, X64_SIGSEGV, (void*)R_RIP, 0xbad0); } void native_gpf(x64emu_t* emu) { emu->test.test = 0; EmitSignal(emu, X64_SIGSEGV, (void*)R_RIP, 0xbad0); // same effect has private opcode? } void native_int(x64emu_t* emu, int num) { emu->test.test = 0; EmitInterruption(emu, num, (void*)R_RIP); } #ifndef _WIN32 void native_wineint(x64emu_t* emu, int num) { emu->test.test = 0; EmitWineInt(emu, num, (void*)R_RIP); } #endif void native_int3(x64emu_t* emu) { EmitSignal(emu, X64_SIGTRAP, NULL, 3); } void native_div0(x64emu_t* emu) { emu->test.test = 0; EmitDiv0(emu, (void*)R_RIP, 1); } void native_fsave(x64emu_t* emu, uint8_t* ed) { fpu_savenv(emu, (char*)ed, 0); uint8_t* p = ed; p += 28; for (int i=0; i<8; ++i) { LD2D(p, &emu->x87[7-i].d); p+=10; } reset_fpu(emu); } void native_fsave16(x64emu_t* emu, uint8_t* ed) { fpu_savenv(emu, (char*)ed, 1); uint8_t* p = ed; p += 14; for (int i=0; i<8; ++i) { LD2D(p, &emu->x87[7-i].d); p+=10; } reset_fpu(emu); } void native_frstor(x64emu_t* emu, uint8_t* ed) { fpu_loadenv(emu, (char*)ed, 0); uint8_t* p = ed; p += 28; for (int i=0; i<8; ++i) { D2LD(&emu->x87[7-i].d, p); p+=10; } } void native_frstor16(x64emu_t* emu, uint8_t* ed) { fpu_loadenv(emu, (char*)ed, 1); uint8_t* p = ed; p += 14; for (int i=0; i<8; ++i) { D2LD(&emu->x87[7-i].d, p); p+=10; } } void native_fprem1(x64emu_t* emu) { int e0, e1; int64_t ll = (int64_t)round(ST0.d / ST1.d); ST0.d = ST0.d - (ST1.d * ll); emu->sw.f.F87_C2 = 0; emu->sw.f.F87_C1 = (ll & 1) ? 1 : 0; emu->sw.f.F87_C3 = (ll & 2) ? 1 : 0; emu->sw.f.F87_C0 = (ll & 4) ? 1 : 0; } const uint8_t ff_mult2[4][256] = { // a = 0x0e 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb, 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d, // a = 0x09 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b, 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46, // a = 0x0d 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97, // a = 0x0b 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e, 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3, }; const uint8_t ff_mult3[4][256] = { // a = 0x02 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25, 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5, // a = 0x01 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, // a = 0x01 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, // a = 0x03 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a, 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba, 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a, }; static uint8_t ff_mult(uint8_t a, uint8_t b) { int retval = 0; for(int i = 0; i < 8; i++) { if((b & 1) == 1) retval ^= a; if((a & 0x80)) { a <<= 1; a ^= 0x1b; } else { a <<= 1; } b >>= 1; } return retval; } void native_aesimc(x64emu_t* emu, int xmm) { sse_regs_t eax1 = emu->xmm[xmm]; for(int j=0; j<4; ++j) { // 0x0E -> 0, 0x09 -> 1, 0x0D -> 2, 0x0B -> 3 // emu->xmm[xmm].ub[0+j*4] = ff_mult(0x0E, eax1.ub[0+j*4]) ^ ff_mult(0x0B, eax1.ub[1+j*4]) ^ ff_mult(0x0D, eax1.ub[2+j*4]) ^ ff_mult(0x09, eax1.ub[3+j*4]); // emu->xmm[xmm].ub[1+j*4] = ff_mult(0x09, eax1.ub[0+j*4]) ^ ff_mult(0x0E, eax1.ub[1+j*4]) ^ ff_mult(0x0B, eax1.ub[2+j*4]) ^ ff_mult(0x0D, eax1.ub[3+j*4]); // emu->xmm[xmm].ub[2+j*4] = ff_mult(0x0D, eax1.ub[0+j*4]) ^ ff_mult(0x09, eax1.ub[1+j*4]) ^ ff_mult(0x0E, eax1.ub[2+j*4]) ^ ff_mult(0x0B, eax1.ub[3+j*4]); // emu->xmm[xmm].ub[3+j*4] = ff_mult(0x0B, eax1.ub[0+j*4]) ^ ff_mult(0x0D, eax1.ub[1+j*4]) ^ ff_mult(0x09, eax1.ub[2+j*4]) ^ ff_mult(0x0E, eax1.ub[3+j*4]); emu->xmm[xmm].ub[0+j*4] = ff_mult2[0][eax1.ub[0+j*4]] ^ ff_mult2[3][eax1.ub[1+j*4]] ^ ff_mult2[2][eax1.ub[2+j*4]] ^ ff_mult2[1][eax1.ub[3+j*4]]; emu->xmm[xmm].ub[1+j*4] = ff_mult2[1][eax1.ub[0+j*4]] ^ ff_mult2[0][eax1.ub[1+j*4]] ^ ff_mult2[3][eax1.ub[2+j*4]] ^ ff_mult2[2][eax1.ub[3+j*4]]; emu->xmm[xmm].ub[2+j*4] = ff_mult2[2][eax1.ub[0+j*4]] ^ ff_mult2[1][eax1.ub[1+j*4]] ^ ff_mult2[0][eax1.ub[2+j*4]] ^ ff_mult2[3][eax1.ub[3+j*4]]; emu->xmm[xmm].ub[3+j*4] = ff_mult2[3][eax1.ub[0+j*4]] ^ ff_mult2[2][eax1.ub[1+j*4]] ^ ff_mult2[1][eax1.ub[2+j*4]] ^ ff_mult2[0][eax1.ub[3+j*4]]; } } void native_aesimc_y(x64emu_t* emu, int ymm) { sse_regs_t eay1 = emu->ymm[ymm]; for(int j=0; j<4; ++j) { // emu->ymm[ymm].ub[0+j*4] = ff_mult(0x0E, eay1.ub[0+j*4]) ^ ff_mult(0x0B, eay1.ub[1+j*4]) ^ ff_mult(0x0D, eay1.ub[2+j*4]) ^ ff_mult(0x09, eay1.ub[3+j*4]); // emu->ymm[ymm].ub[1+j*4] = ff_mult(0x09, eay1.ub[0+j*4]) ^ ff_mult(0x0E, eay1.ub[1+j*4]) ^ ff_mult(0x0B, eay1.ub[2+j*4]) ^ ff_mult(0x0D, eay1.ub[3+j*4]); // emu->ymm[ymm].ub[2+j*4] = ff_mult(0x0D, eay1.ub[0+j*4]) ^ ff_mult(0x09, eay1.ub[1+j*4]) ^ ff_mult(0x0E, eay1.ub[2+j*4]) ^ ff_mult(0x0B, eay1.ub[3+j*4]); // emu->ymm[ymm].ub[3+j*4] = ff_mult(0x0B, eay1.ub[0+j*4]) ^ ff_mult(0x0D, eay1.ub[1+j*4]) ^ ff_mult(0x09, eay1.ub[2+j*4]) ^ ff_mult(0x0E, eay1.ub[3+j*4]); emu->ymm[ymm].ub[0+j*4] = ff_mult2[0][eay1.ub[0+j*4]] ^ ff_mult2[3][eay1.ub[1+j*4]] ^ ff_mult2[2][eay1.ub[2+j*4]] ^ ff_mult2[1][eay1.ub[3+j*4]]; emu->ymm[ymm].ub[1+j*4] = ff_mult2[1][eay1.ub[0+j*4]] ^ ff_mult2[0][eay1.ub[1+j*4]] ^ ff_mult2[3][eay1.ub[2+j*4]] ^ ff_mult2[2][eay1.ub[3+j*4]]; emu->ymm[ymm].ub[2+j*4] = ff_mult2[2][eay1.ub[0+j*4]] ^ ff_mult2[1][eay1.ub[1+j*4]] ^ ff_mult2[0][eay1.ub[2+j*4]] ^ ff_mult2[3][eay1.ub[3+j*4]]; emu->ymm[ymm].ub[3+j*4] = ff_mult2[3][eay1.ub[0+j*4]] ^ ff_mult2[2][eay1.ub[1+j*4]] ^ ff_mult2[1][eay1.ub[2+j*4]] ^ ff_mult2[0][eay1.ub[3+j*4]]; } } void native_aesmc(x64emu_t* emu, int xmm) { sse_regs_t eax1 = emu->xmm[xmm]; for(int j=0; j<4; ++j) { // 0x02 -> 0, 0x01 -> 1, 0x01 -> 2, 0x03 -> 3 // emu->xmm[xmm].ub[0+j*4] = ff_mult(0x02, eax1.ub[0+j*4]) ^ ff_mult(0x03, eax1.ub[1+j*4]) ^ eax1.ub[2+j*4] ^ eax1.ub[3+j*4] ; // emu->xmm[xmm].ub[1+j*4] = eax1.ub[0+j*4] ^ ff_mult(0x02, eax1.ub[1+j*4]) ^ ff_mult(0x03, eax1.ub[2+j*4]) ^ eax1.ub[3+j*4] ; // emu->xmm[xmm].ub[2+j*4] = eax1.ub[0+j*4] ^ eax1.ub[1+j*4] ^ ff_mult(0x02, eax1.ub[2+j*4]) ^ ff_mult(0x03, eax1.ub[3+j*4]); // emu->xmm[xmm].ub[3+j*4] = ff_mult(0x03, eax1.ub[0+j*4]) ^ eax1.ub[1+j*4] ^ eax1.ub[2+j*4] ^ ff_mult(0x02, eax1.ub[3+j*4]); emu->xmm[xmm].ub[0+j*4] = ff_mult3[0][eax1.ub[0+j*4]] ^ ff_mult3[3][eax1.ub[1+j*4]] ^ ff_mult3[2][eax1.ub[2+j*4]] ^ ff_mult3[1][eax1.ub[3+j*4]]; emu->xmm[xmm].ub[1+j*4] = ff_mult3[1][eax1.ub[0+j*4]] ^ ff_mult3[0][eax1.ub[1+j*4]] ^ ff_mult3[3][eax1.ub[2+j*4]] ^ ff_mult3[2][eax1.ub[3+j*4]]; emu->xmm[xmm].ub[2+j*4] = ff_mult3[2][eax1.ub[0+j*4]] ^ ff_mult3[1][eax1.ub[1+j*4]] ^ ff_mult3[0][eax1.ub[2+j*4]] ^ ff_mult3[3][eax1.ub[3+j*4]]; emu->xmm[xmm].ub[3+j*4] = ff_mult3[3][eax1.ub[0+j*4]] ^ ff_mult3[2][eax1.ub[1+j*4]] ^ ff_mult3[1][eax1.ub[2+j*4]] ^ ff_mult3[0][eax1.ub[3+j*4]]; } } void native_aesmc_y(x64emu_t* emu, int ymm) { sse_regs_t eay1 = emu->ymm[ymm]; for(int j=0; j<4; ++j) { // emu->ymm[ymm].ub[0+j*4] = ff_mult(0x02, eay1.ub[0+j*4]) ^ ff_mult(0x03, eay1.ub[1+j*4]) ^ eay1.ub[2+j*4] ^ eay1.ub[3+j*4] ; // emu->ymm[ymm].ub[1+j*4] = eay1.ub[0+j*4] ^ ff_mult(0x02, eay1.ub[1+j*4]) ^ ff_mult(0x03, eay1.ub[2+j*4]) ^ eay1.ub[3+j*4] ; // emu->ymm[ymm].ub[2+j*4] = eay1.ub[0+j*4] ^ eay1.ub[1+j*4] ^ ff_mult(0x02, eay1.ub[2+j*4]) ^ ff_mult(0x03, eay1.ub[3+j*4]); // emu->ymm[ymm].ub[3+j*4] = ff_mult(0x03, eay1.ub[0+j*4]) ^ eay1.ub[1+j*4] ^ eay1.ub[2+j*4] ^ ff_mult(0x02, eay1.ub[3+j*4]); emu->ymm[ymm].ub[0+j*4] = ff_mult3[0][eay1.ub[0+j*4]] ^ ff_mult3[3][eay1.ub[1+j*4]] ^ ff_mult3[2][eay1.ub[2+j*4]] ^ ff_mult3[1][eay1.ub[3+j*4]]; emu->ymm[ymm].ub[1+j*4] = ff_mult3[1][eay1.ub[0+j*4]] ^ ff_mult3[0][eay1.ub[1+j*4]] ^ ff_mult3[3][eay1.ub[2+j*4]] ^ ff_mult3[2][eay1.ub[3+j*4]]; emu->ymm[ymm].ub[2+j*4] = ff_mult3[2][eay1.ub[0+j*4]] ^ ff_mult3[1][eay1.ub[1+j*4]] ^ ff_mult3[0][eay1.ub[2+j*4]] ^ ff_mult3[3][eay1.ub[3+j*4]]; emu->ymm[ymm].ub[3+j*4] = ff_mult3[3][eay1.ub[0+j*4]] ^ ff_mult3[2][eay1.ub[1+j*4]] ^ ff_mult3[1][eay1.ub[2+j*4]] ^ ff_mult3[0][eay1.ub[3+j*4]]; } } // A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf // A N K H E B O L I F C P M J G D const uint8_t invshiftrows[] = {0,13,10, 7, 4, 1,14,11, 8, 5, 2,15,12, 9, 6, 3}; const uint8_t invsubbytes[256] = { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, }; void native_aesdlast(x64emu_t* emu, int xmm) { sse_regs_t eax1; for(int i=0; i<16; ++i) eax1.ub[i] = emu->xmm[xmm].ub[invshiftrows[i]]; //STATE ← InvSubBytes( STATE ); for(int i=0; i<16; ++i) emu->xmm[xmm].ub[i] = invsubbytes[eax1.ub[i]]; } void native_aesdlast_y(x64emu_t* emu, int ymm) { sse_regs_t eay1; for(int i=0; i<16; ++i) eay1.ub[i] = emu->ymm[ymm].ub[invshiftrows[i]]; //STATE ← InvSubBytes( STATE ); for(int i=0; i<16; ++i) emu->ymm[ymm].ub[i] = invsubbytes[eay1.ub[i]]; } const uint8_t shiftrows[] = {0, 5,10,15, 4, 9,14, 3, 8,13, 2, 7,12, 1, 6,11}; const uint8_t subbytes[256] = { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16, }; void native_aeselast(x64emu_t* emu, int xmm) { // A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf // A F K P E J O D I N C H M B G L sse_regs_t eax1; for(int i=0; i<16; ++i) eax1.ub[i] = emu->xmm[xmm].ub[shiftrows[i]]; //STATE ← SubBytes( STATE ); for(int i=0; i<16; ++i) emu->xmm[xmm].ub[i] = subbytes[eax1.ub[i]]; } void native_aeselast_y(x64emu_t* emu, int ymm) { sse_regs_t eay1; for(int i=0; i<16; ++i) eay1.ub[i] = emu->ymm[ymm].ub[shiftrows[i]]; for(int i=0; i<16; ++i) emu->ymm[ymm].ub[i] = subbytes[eay1.ub[i]]; } void native_aesd(x64emu_t* emu, int xmm) { native_aesdlast(emu, xmm); native_aesimc(emu, xmm); } void native_aesd_y(x64emu_t* emu, int ymm) { native_aesdlast_y(emu, ymm); native_aesimc_y(emu, ymm); } void native_aese(x64emu_t* emu, int xmm) { native_aeselast(emu, xmm); native_aesmc(emu, xmm); } void native_aese_y(x64emu_t* emu, int ymm) { native_aeselast_y(emu, ymm); native_aesmc_y(emu, ymm); } void native_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8) { sse_regs_t *EX = p?((sse_regs_t*)p):&emu->xmm[ex]; sse_regs_t *GX = &emu->xmm[gx]; for (int i = 4; i < 8; ++i) GX->ub[i] = subbytes[EX->ub[i]]; for (int i = 12; i < 16; ++i) GX->ub[i] = subbytes[EX->ub[i]]; GX->ud[0] = GX->ud[1]; uint8_t tmp8u = GX->ub[4]; GX->ud[1] = GX->ud[1] >> 8; GX->ub[7] = tmp8u; GX->ud[1] ^= u8; GX->ud[2] = GX->ud[3]; tmp8u = GX->ub[12]; GX->ud[3] = GX->ud[3] >> 8; GX->ub[15] = tmp8u; GX->ud[3] ^= u8; } void native_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8) { sse_regs_t *EX = p?((sse_regs_t*)p):&emu->xmm[ex]; sse_regs_t *GX = &emu->xmm[gx]; int g = (u8&1)?1:0; int e = (u8&0b10000)?1:0; __int128 result = 0; __int128 op2 = EX->q[e]; for (int i=0; i<64; ++i) if(GX->q[g]&(1LL<u128 = result; } void native_pclmul_x(x64emu_t* emu, int gx, int vx, void* p, uint32_t u8) { sse_regs_t *EX = ((uintptr_t)p>15)?((sse_regs_t*)p):&emu->xmm[(uintptr_t)p]; sse_regs_t *GX = &emu->xmm[gx]; sse_regs_t *VX = &emu->xmm[vx]; int g = (u8&1)?1:0; int e = (u8&0b10000)?1:0; __int128 result = 0; __int128 op2 = EX->q[e]; for (int i=0; i<64; ++i) if(VX->q[g]&(1LL<u128 = result; } void native_pclmul_y(x64emu_t* emu, int gy, int vy, void* p, uint32_t u8) { //compute both low and high values native_pclmul_x(emu, gy, vy, p, u8); sse_regs_t *EY = ((uintptr_t)p>15)?((sse_regs_t*)(p+16)):&emu->ymm[(uintptr_t)p]; sse_regs_t *GY = &emu->ymm[gy]; sse_regs_t *VY = &emu->ymm[vy]; int g = (u8&1)?1:0; int e = (u8&0b10000)?1:0; __int128 result = 0; __int128 op2 = EY->q[e]; for (int i=0; i<64; ++i) if(VY->q[g]&(1LL<u128 = result; } void native_clflush(x64emu_t* emu, void* p) { cleanDBFromAddressRange((uintptr_t)p, 8, 0); } static int flagsCacheNeedsTransform(dynarec_native_t* dyn, int ninst) { int jmp = dyn->insts[ninst].x64.jmp_insts; if(jmp<0) return 0; if(dyn->insts[ninst].f_exit.dfnone) // flags are fully known, nothing we can do more return 0; if(dyn->insts[jmp].f_entry.dfnone && !dyn->insts[ninst].f_exit.dfnone && !dyn->insts[jmp].df_notneeded) return 1; switch (dyn->insts[jmp].f_entry.pending) { case SF_UNKNOWN: return 0; case SF_SET: if(dyn->insts[ninst].f_exit.pending!=SF_SET && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING) return 1; else return 0; case SF_SET_PENDING: if(dyn->insts[ninst].f_exit.pending==SF_SET_PENDING) return 0; return 1; case SF_PENDING: if(dyn->insts[ninst].f_exit.pending==SF_PENDING || dyn->insts[ninst].f_exit.pending==SF_SET_PENDING) return 0; return (dyn->insts[jmp].f_entry.dfnone == dyn->insts[ninst].f_exit.dfnone)?0:1; } return 0; } int CacheNeedsTransform(dynarec_native_t* dyn, int ninst) { int ret = 0; if (flagsCacheNeedsTransform(dyn, ninst)) ret|=1; OTHER_CACHE() return ret; } int isPred(dynarec_native_t* dyn, int ninst, int pred) { for(int i=0; iinsts[ninst].pred_sz; ++i) if(dyn->insts[ninst].pred[i]==pred) return pred; return -1; } int getNominalPred(dynarec_native_t* dyn, int ninst) { if((ninst<=0) || !dyn->insts[ninst].pred_sz) return -1; if(isPred(dyn, ninst, ninst-1)!=-1) return ninst-1; return dyn->insts[ninst].pred[0]; } #define F8 *(uint8_t*)(addr++) // Do the GETED, but don't emit anything... uintptr_t fakeed(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nextop) { (void)dyn; (void)addr; (void)ninst; if((nextop&0xC0)==0xC0) return addr; if(!(nextop&0xC0)) { if((nextop&7)==4) { uint8_t sib = F8; if((sib&0x7)==5) { addr+=4; } } else if((nextop&7)==5) { addr+=4; } } else { if((nextop&7)==4) { ++addr; } if(nextop&0x80) { addr+=4; } else { ++addr; } } return addr; } // return Ib on a mod/rm opcode without emitting anything uint8_t geted_ib(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nextop) { addr = fakeed(dyn, addr, ninst, nextop); return F8; } #undef F8 void propagate_nodf(dynarec_native_t* dyn, int ninst) { while(ninst>=0) { if(dyn->insts[ninst].df_notneeded) return; // already flagged if(dyn->insts[ninst].x64.gen_flags || dyn->insts[ninst].x64.use_flags) return; // flags are use, so maybe it's needed dyn->insts[ninst].df_notneeded = 1; --ninst; } } void x64disas_add_register_mapping_annotations(char* buf, const char* disas, const register_mapping_t* mappings, size_t mappings_sz) { static char tmp[32]; tmp[0] = '\0'; int len = 0; // skip the mnemonic char* p = strchr(disas, ' '); if (!p) { sprintf(buf, "%s", disas); return; } p++; // skip the space while (*p) { while (*p && !(*p >= 'a' && *p <= 'e') && *p != 's' && *p != 'r') // skip non-register characters p++; if (!*p) break; for (int i = 0; i < mappings_sz; ++i) { if (!strncmp(p, mappings[i].name, strlen(mappings[i].name))) { len += sprintf(tmp + len, " %s,", mappings[i].native); p += strlen(mappings[i].name) - 1; break; } } p++; } if (tmp[0]) tmp[strlen(tmp) - 1] = '\0'; sprintf(buf, "%-35s ;%s", disas, tmp); }