about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-12-02 10:50:21 +0100
committerGitHub <noreply@github.com>2024-12-02 10:50:21 +0100
commit83ed8ebb22da6d77f8f15af43bf316bd14e93ed3 (patch)
tree209f87e967585d29b49af00c191c2e4a30bf976e
parent88aab487f111ba280048fd7e240e2b62d45eeba8 (diff)
downloadbox64-83ed8ebb22da6d77f8f15af43bf316bd14e93ed3.tar.gz
box64-83ed8ebb22da6d77f8f15af43bf316bd14e93ed3.zip
Android Long Double handling (#2101)
* [ANDROID] Try to handle LongDouble in vaarg correctly

* [ANDROID] Try to enable test07

* [WRAPPED] Fixed Android long double handling (#2096)

* [WRAPPED] Fixed Android long double handling

* [WRAPPED] Force casting to uintptr_t

* [WRAPPED] Fixed a typing issue, removed a warning in the CI

---------

Co-authored-by: rajdakin <rajdakin@gmail.com>
-rw-r--r--CMakeLists.txt11
-rwxr-xr-xrebuild_wrappers.py88
-rw-r--r--src/include/complext.h6
-rw-r--r--src/libtools/myalign.c58
-rw-r--r--src/wrapped/generated/wrapper.c81
5 files changed, 202 insertions, 42 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e2199a2e..075c91c2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -266,6 +266,9 @@ endif()
 if(NOGIT)
     add_definitions(-DNOGIT)
 endif()
+if(CI)
+    add_definitions(-Wno-pointer-type-mismatch)
+endif()
 
 if(HAVE_TRACE)
     set(BOX64 box64)
@@ -1485,10 +1488,10 @@ add_test(threadsStart ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX
     -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref06.txt
     -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
-#add_test(trig ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-#    -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test07_android -D TEST_OUTPUT=tmpfile07.txt
-#    -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref07.txt
-#    -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
+add_test(trig ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
+    -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test07_android -D TEST_OUTPUT=tmpfile07.txt
+    -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref07.txt
+    -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
 add_test(pi ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
     -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test08_android -D TEST_OUTPUT=tmpfile08.txt
diff --git a/rebuild_wrappers.py b/rebuild_wrappers.py
index d4a8f401..c14dd3a2 100755
--- a/rebuild_wrappers.py
+++ b/rebuild_wrappers.py
@@ -1095,7 +1095,7 @@ def main(root: str, files: Iterable[Filename], ver: str):
 			file.write("typedef " + td_types[v.get_convention().ident][v.get_convention().values.index(v[0])] + " (*" + name + ")"
 				+ "(" + ', '.join(td_types[v.get_convention().ident][v.get_convention().values.index(t)] for t in v[2:]) + ");\n")
 		if any_depends_on_ld:
-			file.write("\n#ifdef HAVE_LD80BITS\n")
+			file.write("\n#if defined(HAVE_LD80BITS) || defined(ANDROID)\n")
 			for v in arr:
 				if all(c not in v for c in depends_on_ld):
 					continue
@@ -1103,7 +1103,7 @@ def main(root: str, files: Iterable[Filename], ver: str):
 				v = v[:-1] if v.endswith('NN') else v # FIXME
 				file.write("typedef " + td_types[v.get_convention().ident][v.get_convention().values.index(v[0])] + " (*" + name + ")"
 					+ "(" + ', '.join(td_types[v.get_convention().ident][v.get_convention().values.index(t)] for t in v[2:]) + ");\n")
-			file.write("#else // HAVE_LD80BITS\n")
+			file.write("#else // !HAVE_LD80BITS && !ANDROID\n")
 			for k in td_types_nold:
 				for t in td_types_nold[k]:
 					td_types[k][conventions[k].values.index(t)] = td_types_nold[k][t]
@@ -1197,6 +1197,13 @@ def main(root: str, files: Iterable[Filename], ver: str):
 			},
 			conventions['W']: {}
 		}
+		vals_android = {
+			conventions['F']: {
+				"D": "long double ld=fn({0}); emu->xmm[0].u128=*(__uint128_t*)&ld;",
+				"Y": "from_complexl(emu, fn({0}));",
+			},
+			conventions['W']: {}
+		}
 		vals_ld = {
 			k: {t: vals[k][k.values.index(t)] for t in vals_nold[k]} for k in vals_nold
 		}
@@ -1333,9 +1340,24 @@ def main(root: str, files: Iterable[Filename], ver: str):
 			'D': "FromLD((void*)(R_RSP + {p})), ",      # K
 			'Y': "to_complexk(emu, R_RSP + {p}), ",     # y
 		}
+		arg_s_android = {
+			'D': "*(long double*)(R_RSP + {p})], ",
+			'Y': "to_complexl(emu, R_RSP + {p}), ",
+		}
 		arg_s_ld = {
 			t: arg_s[conventions['F'].values.index(t)] for t in arg_s_nold
 		}
+		vxmm_noandroid = vxmm[:]
+		vxmm_android = vxmm[:]
+		vxmm_android[conventions['F'].values.index('D')] = 1
+		vxmm_android[conventions['F'].values.index('Y')] = 1
+		arg_x_android = {
+			'D': "*(long double*)&emu->xmm[{p}], ",
+			'Y': "to_complexl(emu, (uintptr_t)&emu->xmm[{p}]), ",
+		}
+		arg_x_ld = {
+			t: arg_x[conventions['F'].values.index(t)] for t in arg_x_android
+		}
 		
 		# Asserts
 		for k in conventions:
@@ -1539,7 +1561,15 @@ def main(root: str, files: Iterable[Filename], ver: str):
 				else:
 					function_writer(file, v, v + "_t")
 			if any_depends_on_ld:
-				file.write("\n#ifdef HAVE_LD80BITS\n")
+				file.write("\n#if defined(ANDROID)\n")
+				for c in vals_android:
+					for t in vals_android[c]:
+						vals[c][c.values.index(t)] = vals_android[c][t]
+				for t in arg_s_android:
+					arg_s[conventions['F'].values.index(t)] = arg_s_android[t]
+				for t in arg_x_android:
+					arg_x[conventions['F'].values.index(t)] = arg_x_android[t]
+				vxmm = vxmm_android
 				for v in gbls[k]:
 					if all(c not in v for c in depends_on_ld):
 						continue
@@ -1548,12 +1578,20 @@ def main(root: str, files: Iterable[Filename], ver: str):
 						file.write("void vFv(x64emu_t *emu, uintptr_t fcn) { vFv_t fn = (vFv_t)fcn; fn(); (void)emu; }\n")
 					else:
 						function_writer(file, v, v + "_t")
-				file.write("#else // HAVE_LD80BITS\n")
+				file.write("#elif !defined(HAVE_LD80BITS)\n")
+				for c in vals_android:
+					for t in vals_ld[c]:
+						vals[c][c.values.index(t)] = vals_ld[c][t]
 				for c in vals_nold:
 					for t in vals_nold[c]:
 						vals[c][c.values.index(t)] = vals_nold[c][t]
+				for t in arg_s_android:
+					arg_s[conventions['F'].values.index(t)] = arg_s_ld[t]
 				for t in arg_s_nold:
 					arg_s[conventions['F'].values.index(t)] = arg_s_nold[t]
+				for t in arg_x_android:
+					arg_x[conventions['F'].values.index(t)] = arg_x_ld[t]
+				vxmm = vxmm_noandroid
 				for v in gbls[k]:
 					if all(c not in v for c in depends_on_ld):
 						continue
@@ -1563,10 +1601,19 @@ def main(root: str, files: Iterable[Filename], ver: str):
 					else:
 						function_writer(file, v, v + "_t")
 				for c in vals_nold:
-					for t in vals_ld[c]:
+					for t in vals_nold[c]:
 						vals[c][c.values.index(t)] = vals_ld[c][t]
 				for t in arg_s_nold:
-					arg_s[conventions['F'].values.index(t)] = arg_s_nold[t]
+					arg_s[conventions['F'].values.index(t)] = arg_s_ld[t]
+				file.write("#else // defined(HAVE_LD80BITS) && !defined(ANDROID)\n")
+				for v in gbls[k]:
+					if all(c not in v for c in depends_on_ld):
+						continue
+					if v == FunctionType("vFv"):
+						# Suppress all warnings...
+						file.write("void vFv(x64emu_t *emu, uintptr_t fcn) { vFv_t fn = (vFv_t)fcn; fn(); (void)emu; }\n")
+					else:
+						function_writer(file, v, v + "_t")
 				file.write("#endif\n")
 			if k != str(Clauses()):
 				file.write("#endif\n")
@@ -1581,26 +1628,47 @@ def main(root: str, files: Iterable[Filename], ver: str):
 					continue
 				function_writer(file, vr, vf + "_t")
 			if any_depends_on_ld:
-				file.write("\n#ifdef HAVE_LD80BITS\n")
+				file.write("\n#if defined(ANDROID)\n")
+				for c in vals_android:
+					for t in vals_android[c]:
+						vals[c][c.values.index(t)] = vals_android[c][t]
+				for t in arg_s_android:
+					arg_s[conventions['F'].values.index(t)] = arg_s_android[t]
+				for t in arg_x_android:
+					arg_x[conventions['F'].values.index(t)] = arg_x_android[t]
+				vxmm = vxmm_android
 				for vr, vf in redirects[k]:
 					if all(c not in vr for c in depends_on_ld):
 						continue
 					function_writer(file, vr, vf + "_t")
-				file.write("#else // HAVE_LD80BITS\n")
+				file.write("#elif !defined(HAVE_LD80BITS)\n")
+				for c in vals_android:
+					for t in vals_ld[c]:
+						vals[c][c.values.index(t)] = vals_ld[c][t]
 				for c in vals_nold:
 					for t in vals_nold[c]:
 						vals[c][c.values.index(t)] = vals_nold[c][t]
+				for t in arg_s_android:
+					arg_s[conventions['F'].values.index(t)] = arg_s_ld[t]
 				for t in arg_s_nold:
 					arg_s[conventions['F'].values.index(t)] = arg_s_nold[t]
+				for t in arg_x_android:
+					arg_x[conventions['F'].values.index(t)] = arg_x_ld[t]
+				vxmm = vxmm_noandroid
 				for vr, vf in redirects[k]:
 					if all(c not in vr for c in depends_on_ld):
 						continue
 					function_writer(file, vr, vf + "_t")
 				for c in vals_nold:
-					for t in vals_ld[c]:
+					for t in vals_nold[c]:
 						vals[c][c.values.index(t)] = vals_ld[c][t]
 				for t in arg_s_nold:
-					arg_s[conventions['F'].values.index(t)] = arg_s_nold[t]
+					arg_s[conventions['F'].values.index(t)] = arg_s_ld[t]
+				file.write("#else // defined(HAVE_LD80BITS) && !defined(ANDROID)\n")
+				for vr, vf in redirects[k]:
+					if all(c not in vr for c in depends_on_ld):
+						continue
+					function_writer(file, vr, vf + "_t")
 				file.write("#endif\n")
 			if k != str(Clauses()):
 				file.write("#endif\n")
diff --git a/src/include/complext.h b/src/include/complext.h
index 695bb876..27336ca8 100644
--- a/src/include/complext.h
+++ b/src/include/complext.h
@@ -45,12 +45,18 @@ static inline void from_complex(x64emu_t* emu, complex_t v) {
     emu->xmm[0].d[0]=v.r; 
     emu->xmm[1].d[0]=v.i;
 }
+#ifdef ANDROID
+static inline void from_complexl(x64emu_t* emu, complexl_t v) {
+    memcpy(&emu->xmm[0], &v, 16*2); // what if AVX is present?
+}
+#else
 static inline void from_complexl(x64emu_t* emu, complexl_t v) {
     fpu_do_push(emu);
     fpu_do_push(emu);
     ST0.d=FromLD(&v.r); 
     ST(1).d=FromLD(&v.i);
 }
+#endif
 static inline void from_complexk(x64emu_t* emu, complex_t v) {
     fpu_do_push(emu);
     fpu_do_push(emu);
diff --git a/src/libtools/myalign.c b/src/libtools/myalign.c
index d34e0b8e..599ce904 100644
--- a/src/libtools/myalign.c
+++ b/src/libtools/myalign.c
@@ -123,6 +123,21 @@ void myStackAlign(x64emu_t* emu, const char* fmt, uint64_t* st, uint64_t* mystac
                     mystack++;
                 memcpy(mystack, st, 16);
                 st+=2; mystack+=2;
+                #elif defined(ANDROID)
+                // there is 128bits long double on native and x64 side
+                // need to align stacks to 128bits boundaries
+                if((((uintptr_t)mystack)&0xf)!=0)
+                    mystack++;
+                if(xmm) {
+                    memcpy(mystack, &emu->xmm[x++], 16);
+                    st+=2; mystack+=2;
+                    --xmm;
+                } else {
+                    if((((uintptr_t)st)&0xf)!=0)
+                        st++;
+                    memcpy(mystack, st, 16);
+                    st+=2; mystack+=2;
+                }
                 #else
                 // there is 128bits long double on ARM64, but they need 128bit alignment
                 if((((uintptr_t)mystack)&0xf)!=0)
@@ -366,6 +381,21 @@ void myStackAlignW(x64emu_t* emu, const char* fmt, uint64_t* st, uint64_t* mysta
                     mystack++;
                 memcpy(mystack, st, 16);
                 st+=2; mystack+=2;
+                #elif defined(ANDROID)
+                // there is 128bits long double on native and x64 side
+                // need to align stacks to 128bits boundaries
+                if((((uintptr_t)mystack)&0xf)!=0)
+                    mystack++;
+                if(xmm) {
+                    memcpy(mystack, &emu->xmm[x++], 16);
+                    st+=2; mystack+=2;
+                    --xmm;
+                } else {
+                    if((((uintptr_t)st)&0xf)!=0)
+                        st++;
+                    memcpy(mystack, st, 16);
+                    st+=2; mystack+=2;
+                }
                 #else
                 // there is 128bits long double on ARM64, but they need 128bit alignment
                 if((((uintptr_t)mystack)&0xf)!=0)
@@ -743,6 +773,20 @@ void myStackAlignValist(x64emu_t* emu, const char* fmt, uint64_t* mystack, x64_v
                     mystack++;
                 memcpy(mystack, st, 16);
                 st+=2; mystack+=2;
+                #elif defined(ANDROID)
+                // there is 128bits long double on native and x64 side
+                // need to align stacks to 128bits boundaries
+                if((((uintptr_t)mystack)&0xf)!=0)
+                    mystack++;
+                if(fprs<X64_VA_MAX_XMM) {
+                    memcpy(mystack, &area[fprs/8], 16);
+                    fprs+=16; mystack+=2;
+                } else {
+                    if((((uintptr_t)st)&0xf)!=0)
+                        st++;
+                    memcpy(mystack, st, 16);
+                    st+=2; mystack+=2;
+                }
                 #else
                 // there is 128bits long double on ARM64, but they need 128bit alignment
                 if((((uintptr_t)mystack)&0xf)!=0)
@@ -888,6 +932,20 @@ void myStackAlignWValist(x64emu_t* emu, const char* fmt, uint64_t* mystack, x64_
                     mystack++;
                 memcpy(mystack, st, 16);
                 st+=2; mystack+=2;
+                #elif defined(ANDROID)
+                // there is 128bits long double on native and x64 side
+                // need to align stacks to 128bits boundaries
+                if((((uintptr_t)mystack)&0xf)!=0)
+                    mystack++;
+                if(fprs<X64_VA_MAX_XMM) {
+                    memcpy(mystack, &area[fprs/8], 16);
+                    fprs+=16; mystack+=2;
+                } else {
+                    if((((uintptr_t)st)&0xf)!=0)
+                        st++;
+                    memcpy(mystack, st, 16);
+                    st+=2; mystack+=2;
+                }
                 #else
                 // there is 128bits long double on ARM64, but they need 128bit alignment
                 if((((uintptr_t)mystack)&0xf)!=0)
diff --git a/src/wrapped/generated/wrapper.c b/src/wrapped/generated/wrapper.c
index 92ffd295..b7e3730a 100644
--- a/src/wrapped/generated/wrapper.c
+++ b/src/wrapped/generated/wrapper.c
@@ -3392,7 +3392,7 @@ typedef void (*vFpppppppppppppppppppppppp_t)(void*, void*, void*, void*, void*,
 typedef void* (*pFpupuupppppppppppppppppppp_t)(void*, uint32_t, void*, uint32_t, uint32_t, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*);
 typedef int32_t (*iFpppppppppppppppppppppppppppppppppp_t)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*);
 
-#ifdef HAVE_LD80BITS
+#if defined(HAVE_LD80BITS) || defined(ANDROID)
 typedef int32_t (*iFD_t)(long double);
 typedef long double (*DFD_t)(long double);
 typedef long double (*DFY_t)(complexl_t);
@@ -3417,7 +3417,7 @@ typedef int32_t (*iFpLpD_t)(void*, uintptr_t, void*, long double);
 typedef void* (*pFDipp_t)(long double, int32_t, void*, void*);
 typedef int32_t (*iFDipppL_t)(long double, int32_t, void*, void*, void*, uintptr_t);
 typedef void (*vFppippDpDc_t)(void*, void*, int32_t, void*, void*, long double, void*, long double, int8_t);
-#else // HAVE_LD80BITS
+#else // !HAVE_LD80BITS && !ANDROID
 typedef int32_t (*iFD_t)(double);
 typedef double (*DFD_t)(double);
 typedef double (*DFY_t)(complex_t);
@@ -6811,32 +6811,32 @@ void vFpppppppppppppppppppppppp(x64emu_t *emu, uintptr_t fcn) { vFpppppppppppppp
 void pFpupuupppppppppppppppppppp(x64emu_t *emu, uintptr_t fcn) { pFpupuupppppppppppppppppppp_t fn = (pFpupuupppppppppppppppppppp_t)fcn; R_RAX=(uintptr_t)fn((void*)R_RDI, (uint32_t)R_RSI, (void*)R_RDX, (uint32_t)R_RCX, (uint32_t)R_R8, (void*)R_R9, *(void**)(R_RSP + 8), *(void**)(R_RSP + 16), *(void**)(R_RSP + 24), *(void**)(R_RSP + 32), *(void**)(R_RSP + 40), *(void**)(R_RSP + 48), *(void**)(R_RSP + 56), *(void**)(R_RSP + 64), *(void**)(R_RSP + 72), *(void**)(R_RSP + 80), *(void**)(R_RSP + 88), *(void**)(R_RSP + 96), *(void**)(R_RSP + 104), *(void**)(R_RSP + 112), *(void**)(R_RSP + 120), *(void**)(R_RSP + 128), *(void**)(R_RSP + 136), *(void**)(R_RSP + 144), *(void**)(R_RSP + 152)); }
 void iFpppppppppppppppppppppppppppppppppp(x64emu_t *emu, uintptr_t fcn) { iFpppppppppppppppppppppppppppppppppp_t fn = (iFpppppppppppppppppppppppppppppppppp_t)fcn; R_RAX=(uint32_t)fn((void*)R_RDI, (void*)R_RSI, (void*)R_RDX, (void*)R_RCX, (void*)R_R8, (void*)R_R9, *(void**)(R_RSP + 8), *(void**)(R_RSP + 16), *(void**)(R_RSP + 24), *(void**)(R_RSP + 32), *(void**)(R_RSP + 40), *(void**)(R_RSP + 48), *(void**)(R_RSP + 56), *(void**)(R_RSP + 64), *(void**)(R_RSP + 72), *(void**)(R_RSP + 80), *(void**)(R_RSP + 88), *(void**)(R_RSP + 96), *(void**)(R_RSP + 104), *(void**)(R_RSP + 112), *(void**)(R_RSP + 120), *(void**)(R_RSP + 128), *(void**)(R_RSP + 136), *(void**)(R_RSP + 144), *(void**)(R_RSP + 152), *(void**)(R_RSP + 160), *(void**)(R_RSP + 168), *(void**)(R_RSP + 176), *(void**)(R_RSP + 184), *(void**)(R_RSP + 192), *(void**)(R_RSP + 200), *(void**)(R_RSP + 208), *(void**)(R_RSP + 216), *(void**)(R_RSP + 224)); }
 
-#ifdef HAVE_LD80BITS
-void iFD(x64emu_t *emu, uintptr_t fcn) { iFD_t fn = (iFD_t)fcn; R_RAX=(uint32_t)fn(LD2localLD((void*)(R_RSP + 8))); }
-void DFD(x64emu_t *emu, uintptr_t fcn) { DFD_t fn = (DFD_t)fcn; long double ld=fn(LD2localLD((void*)(R_RSP + 8))); fpu_do_push(emu); ST0val = ld; }
-void DFY(x64emu_t *emu, uintptr_t fcn) { DFY_t fn = (DFY_t)fcn; long double ld=fn(to_complexl(emu, R_RSP + 8)); fpu_do_push(emu); ST0val = ld; }
-void lFD(x64emu_t *emu, uintptr_t fcn) { lFD_t fn = (lFD_t)fcn; R_RAX=(intptr_t)fn(LD2localLD((void*)(R_RSP + 8))); }
-void YFY(x64emu_t *emu, uintptr_t fcn) { YFY_t fn = (YFY_t)fcn; from_complexl(emu, fn(to_complexl(emu, R_RSP + 8))); }
-void IFED(x64emu_t *emu, uintptr_t fcn) { IFED_t fn = (IFED_t)fcn; S_RAX=(int64_t)fn(emu, LD2localLD((void*)(R_RSP + 8))); }
-void fFfD(x64emu_t *emu, uintptr_t fcn) { fFfD_t fn = (fFfD_t)fcn; emu->xmm[0].f[0]=fn(emu->xmm[0].f[0], LD2localLD((void*)(R_RSP + 8))); }
-void dFdD(x64emu_t *emu, uintptr_t fcn) { dFdD_t fn = (dFdD_t)fcn; emu->xmm[0].d[0]=fn(emu->xmm[0].d[0], LD2localLD((void*)(R_RSP + 8))); }
-void DFiD(x64emu_t *emu, uintptr_t fcn) { DFiD_t fn = (DFiD_t)fcn; long double ld=fn((int32_t)R_RDI, LD2localLD((void*)(R_RSP + 8))); fpu_do_push(emu); ST0val = ld; }
-void DFDi(x64emu_t *emu, uintptr_t fcn) { DFDi_t fn = (DFDi_t)fcn; long double ld=fn(LD2localLD((void*)(R_RSP + 8)), (int32_t)R_RDI); fpu_do_push(emu); ST0val = ld; }
-void DFDD(x64emu_t *emu, uintptr_t fcn) { DFDD_t fn = (DFDD_t)fcn; long double ld=fn(LD2localLD((void*)(R_RSP + 8)), LD2localLD((void*)(R_RSP + 24))); fpu_do_push(emu); ST0val = ld; }
-void DFDp(x64emu_t *emu, uintptr_t fcn) { DFDp_t fn = (DFDp_t)fcn; long double ld=fn(LD2localLD((void*)(R_RSP + 8)), (void*)R_RDI); fpu_do_push(emu); ST0val = ld; }
-void DFpp(x64emu_t *emu, uintptr_t fcn) { DFpp_t fn = (DFpp_t)fcn; long double ld=fn((void*)R_RDI, (void*)R_RSI); fpu_do_push(emu); ST0val = ld; }
-void lFDD(x64emu_t *emu, uintptr_t fcn) { lFDD_t fn = (lFDD_t)fcn; R_RAX=(intptr_t)fn(LD2localLD((void*)(R_RSP + 8)), LD2localLD((void*)(R_RSP + 24))); }
-void YFYY(x64emu_t *emu, uintptr_t fcn) { YFYY_t fn = (YFYY_t)fcn; from_complexl(emu, fn(to_complexl(emu, R_RSP + 8), to_complexl(emu, R_RSP + 40))); }
-void vFDpp(x64emu_t *emu, uintptr_t fcn) { vFDpp_t fn = (vFDpp_t)fcn; fn(LD2localLD((void*)(R_RSP + 8)), (void*)R_RDI, (void*)R_RSI); }
-void DFDDD(x64emu_t *emu, uintptr_t fcn) { DFDDD_t fn = (DFDDD_t)fcn; long double ld=fn(LD2localLD((void*)(R_RSP + 8)), LD2localLD((void*)(R_RSP + 24)), LD2localLD((void*)(R_RSP + 40))); fpu_do_push(emu); ST0val = ld; }
-void DFppi(x64emu_t *emu, uintptr_t fcn) { DFppi_t fn = (DFppi_t)fcn; long double ld=fn((void*)R_RDI, (void*)R_RSI, (int32_t)R_RDX); fpu_do_push(emu); ST0val = ld; }
-void DFppp(x64emu_t *emu, uintptr_t fcn) { DFppp_t fn = (DFppp_t)fcn; long double ld=fn((void*)R_RDI, (void*)R_RSI, (void*)R_RDX); fpu_do_push(emu); ST0val = ld; }
-void pFDip(x64emu_t *emu, uintptr_t fcn) { pFDip_t fn = (pFDip_t)fcn; R_RAX=(uintptr_t)fn(LD2localLD((void*)(R_RSP + 8)), (int32_t)R_RDI, (void*)R_RSI); }
-void iFpLpD(x64emu_t *emu, uintptr_t fcn) { iFpLpD_t fn = (iFpLpD_t)fcn; R_RAX=(uint32_t)fn((void*)R_RDI, (uintptr_t)R_RSI, (void*)R_RDX, LD2localLD((void*)(R_RSP + 8))); }
-void pFDipp(x64emu_t *emu, uintptr_t fcn) { pFDipp_t fn = (pFDipp_t)fcn; R_RAX=(uintptr_t)fn(LD2localLD((void*)(R_RSP + 8)), (int32_t)R_RDI, (void*)R_RSI, (void*)R_RDX); }
-void iFDipppL(x64emu_t *emu, uintptr_t fcn) { iFDipppL_t fn = (iFDipppL_t)fcn; R_RAX=(uint32_t)fn(LD2localLD((void*)(R_RSP + 8)), (int32_t)R_RDI, (void*)R_RSI, (void*)R_RDX, (void*)R_RCX, (uintptr_t)R_R8); }
-void vFppippDpDc(x64emu_t *emu, uintptr_t fcn) { vFppippDpDc_t fn = (vFppippDpDc_t)fcn; fn((void*)R_RDI, (void*)R_RSI, (int32_t)R_RDX, (void*)R_RCX, (void*)R_R8, LD2localLD((void*)(R_RSP + 8)), (void*)R_R9, LD2localLD((void*)(R_RSP + 24)), *(int8_t*)(R_RSP + 40)); }
-#else // HAVE_LD80BITS
+#if defined(ANDROID)
+void iFD(x64emu_t *emu, uintptr_t fcn) { iFD_t fn = (iFD_t)fcn; R_RAX=(uint32_t)fn(*(long double*)&emu->xmm[0]); }
+void DFD(x64emu_t *emu, uintptr_t fcn) { DFD_t fn = (DFD_t)fcn; long double ld=fn(*(long double*)&emu->xmm[0]); emu->xmm[0].u128=*(__uint128_t*)&ld; }
+void DFY(x64emu_t *emu, uintptr_t fcn) { DFY_t fn = (DFY_t)fcn; long double ld=fn(to_complexl(emu, (uintptr_t)&emu->xmm[0])); emu->xmm[0].u128=*(__uint128_t*)&ld; }
+void lFD(x64emu_t *emu, uintptr_t fcn) { lFD_t fn = (lFD_t)fcn; R_RAX=(intptr_t)fn(*(long double*)&emu->xmm[0]); }
+void YFY(x64emu_t *emu, uintptr_t fcn) { YFY_t fn = (YFY_t)fcn; from_complexl(emu, fn(to_complexl(emu, (uintptr_t)&emu->xmm[0]))); }
+void IFED(x64emu_t *emu, uintptr_t fcn) { IFED_t fn = (IFED_t)fcn; S_RAX=(int64_t)fn(emu, *(long double*)&emu->xmm[0]); }
+void fFfD(x64emu_t *emu, uintptr_t fcn) { fFfD_t fn = (fFfD_t)fcn; emu->xmm[0].f[0]=fn(emu->xmm[0].f[0], *(long double*)&emu->xmm[1]); }
+void dFdD(x64emu_t *emu, uintptr_t fcn) { dFdD_t fn = (dFdD_t)fcn; emu->xmm[0].d[0]=fn(emu->xmm[0].d[0], *(long double*)&emu->xmm[1]); }
+void DFiD(x64emu_t *emu, uintptr_t fcn) { DFiD_t fn = (DFiD_t)fcn; long double ld=fn((int32_t)R_RDI, *(long double*)&emu->xmm[0]); emu->xmm[0].u128=*(__uint128_t*)&ld; }
+void DFDi(x64emu_t *emu, uintptr_t fcn) { DFDi_t fn = (DFDi_t)fcn; long double ld=fn(*(long double*)&emu->xmm[0], (int32_t)R_RDI); emu->xmm[0].u128=*(__uint128_t*)&ld; }
+void DFDD(x64emu_t *emu, uintptr_t fcn) { DFDD_t fn = (DFDD_t)fcn; long double ld=fn(*(long double*)&emu->xmm[0], *(long double*)&emu->xmm[1]); emu->xmm[0].u128=*(__uint128_t*)&ld; }
+void DFDp(x64emu_t *emu, uintptr_t fcn) { DFDp_t fn = (DFDp_t)fcn; long double ld=fn(*(long double*)&emu->xmm[0], (void*)R_RDI); emu->xmm[0].u128=*(__uint128_t*)&ld; }
+void DFpp(x64emu_t *emu, uintptr_t fcn) { DFpp_t fn = (DFpp_t)fcn; long double ld=fn((void*)R_RDI, (void*)R_RSI); emu->xmm[0].u128=*(__uint128_t*)&ld; }
+void lFDD(x64emu_t *emu, uintptr_t fcn) { lFDD_t fn = (lFDD_t)fcn; R_RAX=(intptr_t)fn(*(long double*)&emu->xmm[0], *(long double*)&emu->xmm[1]); }
+void YFYY(x64emu_t *emu, uintptr_t fcn) { YFYY_t fn = (YFYY_t)fcn; from_complexl(emu, fn(to_complexl(emu, (uintptr_t)&emu->xmm[0]), to_complexl(emu, (uintptr_t)&emu->xmm[1]))); }
+void vFDpp(x64emu_t *emu, uintptr_t fcn) { vFDpp_t fn = (vFDpp_t)fcn; fn(*(long double*)&emu->xmm[0], (void*)R_RDI, (void*)R_RSI); }
+void DFDDD(x64emu_t *emu, uintptr_t fcn) { DFDDD_t fn = (DFDDD_t)fcn; long double ld=fn(*(long double*)&emu->xmm[0], *(long double*)&emu->xmm[1], *(long double*)&emu->xmm[2]); emu->xmm[0].u128=*(__uint128_t*)&ld; }
+void DFppi(x64emu_t *emu, uintptr_t fcn) { DFppi_t fn = (DFppi_t)fcn; long double ld=fn((void*)R_RDI, (void*)R_RSI, (int32_t)R_RDX); emu->xmm[0].u128=*(__uint128_t*)&ld; }
+void DFppp(x64emu_t *emu, uintptr_t fcn) { DFppp_t fn = (DFppp_t)fcn; long double ld=fn((void*)R_RDI, (void*)R_RSI, (void*)R_RDX); emu->xmm[0].u128=*(__uint128_t*)&ld; }
+void pFDip(x64emu_t *emu, uintptr_t fcn) { pFDip_t fn = (pFDip_t)fcn; R_RAX=(uintptr_t)fn(*(long double*)&emu->xmm[0], (int32_t)R_RDI, (void*)R_RSI); }
+void iFpLpD(x64emu_t *emu, uintptr_t fcn) { iFpLpD_t fn = (iFpLpD_t)fcn; R_RAX=(uint32_t)fn((void*)R_RDI, (uintptr_t)R_RSI, (void*)R_RDX, *(long double*)&emu->xmm[0]); }
+void pFDipp(x64emu_t *emu, uintptr_t fcn) { pFDipp_t fn = (pFDipp_t)fcn; R_RAX=(uintptr_t)fn(*(long double*)&emu->xmm[0], (int32_t)R_RDI, (void*)R_RSI, (void*)R_RDX); }
+void iFDipppL(x64emu_t *emu, uintptr_t fcn) { iFDipppL_t fn = (iFDipppL_t)fcn; R_RAX=(uint32_t)fn(*(long double*)&emu->xmm[0], (int32_t)R_RDI, (void*)R_RSI, (void*)R_RDX, (void*)R_RCX, (uintptr_t)R_R8); }
+void vFppippDpDc(x64emu_t *emu, uintptr_t fcn) { vFppippDpDc_t fn = (vFppippDpDc_t)fcn; fn((void*)R_RDI, (void*)R_RSI, (int32_t)R_RDX, (void*)R_RCX, (void*)R_R8, *(long double*)&emu->xmm[0], (void*)R_R9, *(long double*)&emu->xmm[1], *(int8_t*)(R_RSP + 8)); }
+#elif !defined(HAVE_LD80BITS)
 void iFD(x64emu_t *emu, uintptr_t fcn) { iFD_t fn = (iFD_t)fcn; R_RAX=(uint32_t)fn(FromLD((void*)(R_RSP + 8))); }
 void DFD(x64emu_t *emu, uintptr_t fcn) { DFD_t fn = (DFD_t)fcn; double db=fn(FromLD((void*)(R_RSP + 8))); fpu_do_push(emu); ST0val = db; }
 void DFY(x64emu_t *emu, uintptr_t fcn) { DFY_t fn = (DFY_t)fcn; double db=fn(to_complexk(emu, R_RSP + 8)); fpu_do_push(emu); ST0val = db; }
@@ -6861,6 +6861,31 @@ void iFpLpD(x64emu_t *emu, uintptr_t fcn) { iFpLpD_t fn = (iFpLpD_t)fcn; R_RAX=(
 void pFDipp(x64emu_t *emu, uintptr_t fcn) { pFDipp_t fn = (pFDipp_t)fcn; R_RAX=(uintptr_t)fn(FromLD((void*)(R_RSP + 8)), (int32_t)R_RDI, (void*)R_RSI, (void*)R_RDX); }
 void iFDipppL(x64emu_t *emu, uintptr_t fcn) { iFDipppL_t fn = (iFDipppL_t)fcn; R_RAX=(uint32_t)fn(FromLD((void*)(R_RSP + 8)), (int32_t)R_RDI, (void*)R_RSI, (void*)R_RDX, (void*)R_RCX, (uintptr_t)R_R8); }
 void vFppippDpDc(x64emu_t *emu, uintptr_t fcn) { vFppippDpDc_t fn = (vFppippDpDc_t)fcn; fn((void*)R_RDI, (void*)R_RSI, (int32_t)R_RDX, (void*)R_RCX, (void*)R_R8, FromLD((void*)(R_RSP + 8)), (void*)R_R9, FromLD((void*)(R_RSP + 24)), *(int8_t*)(R_RSP + 40)); }
+#else // defined(HAVE_LD80BITS) && !defined(ANDROID)
+void iFD(x64emu_t *emu, uintptr_t fcn) { iFD_t fn = (iFD_t)fcn; R_RAX=(uint32_t)fn(LD2localLD((void*)(R_RSP + 8))); }
+void DFD(x64emu_t *emu, uintptr_t fcn) { DFD_t fn = (DFD_t)fcn; long double ld=fn(LD2localLD((void*)(R_RSP + 8))); fpu_do_push(emu); ST0val = ld; }
+void DFY(x64emu_t *emu, uintptr_t fcn) { DFY_t fn = (DFY_t)fcn; long double ld=fn(to_complexl(emu, R_RSP + 8)); fpu_do_push(emu); ST0val = ld; }
+void lFD(x64emu_t *emu, uintptr_t fcn) { lFD_t fn = (lFD_t)fcn; R_RAX=(intptr_t)fn(LD2localLD((void*)(R_RSP + 8))); }
+void YFY(x64emu_t *emu, uintptr_t fcn) { YFY_t fn = (YFY_t)fcn; from_complexl(emu, fn(to_complexl(emu, R_RSP + 8))); }
+void IFED(x64emu_t *emu, uintptr_t fcn) { IFED_t fn = (IFED_t)fcn; S_RAX=(int64_t)fn(emu, LD2localLD((void*)(R_RSP + 8))); }
+void fFfD(x64emu_t *emu, uintptr_t fcn) { fFfD_t fn = (fFfD_t)fcn; emu->xmm[0].f[0]=fn(emu->xmm[0].f[0], LD2localLD((void*)(R_RSP + 8))); }
+void dFdD(x64emu_t *emu, uintptr_t fcn) { dFdD_t fn = (dFdD_t)fcn; emu->xmm[0].d[0]=fn(emu->xmm[0].d[0], LD2localLD((void*)(R_RSP + 8))); }
+void DFiD(x64emu_t *emu, uintptr_t fcn) { DFiD_t fn = (DFiD_t)fcn; long double ld=fn((int32_t)R_RDI, LD2localLD((void*)(R_RSP + 8))); fpu_do_push(emu); ST0val = ld; }
+void DFDi(x64emu_t *emu, uintptr_t fcn) { DFDi_t fn = (DFDi_t)fcn; long double ld=fn(LD2localLD((void*)(R_RSP + 8)), (int32_t)R_RDI); fpu_do_push(emu); ST0val = ld; }
+void DFDD(x64emu_t *emu, uintptr_t fcn) { DFDD_t fn = (DFDD_t)fcn; long double ld=fn(LD2localLD((void*)(R_RSP + 8)), LD2localLD((void*)(R_RSP + 24))); fpu_do_push(emu); ST0val = ld; }
+void DFDp(x64emu_t *emu, uintptr_t fcn) { DFDp_t fn = (DFDp_t)fcn; long double ld=fn(LD2localLD((void*)(R_RSP + 8)), (void*)R_RDI); fpu_do_push(emu); ST0val = ld; }
+void DFpp(x64emu_t *emu, uintptr_t fcn) { DFpp_t fn = (DFpp_t)fcn; long double ld=fn((void*)R_RDI, (void*)R_RSI); fpu_do_push(emu); ST0val = ld; }
+void lFDD(x64emu_t *emu, uintptr_t fcn) { lFDD_t fn = (lFDD_t)fcn; R_RAX=(intptr_t)fn(LD2localLD((void*)(R_RSP + 8)), LD2localLD((void*)(R_RSP + 24))); }
+void YFYY(x64emu_t *emu, uintptr_t fcn) { YFYY_t fn = (YFYY_t)fcn; from_complexl(emu, fn(to_complexl(emu, R_RSP + 8), to_complexl(emu, R_RSP + 40))); }
+void vFDpp(x64emu_t *emu, uintptr_t fcn) { vFDpp_t fn = (vFDpp_t)fcn; fn(LD2localLD((void*)(R_RSP + 8)), (void*)R_RDI, (void*)R_RSI); }
+void DFDDD(x64emu_t *emu, uintptr_t fcn) { DFDDD_t fn = (DFDDD_t)fcn; long double ld=fn(LD2localLD((void*)(R_RSP + 8)), LD2localLD((void*)(R_RSP + 24)), LD2localLD((void*)(R_RSP + 40))); fpu_do_push(emu); ST0val = ld; }
+void DFppi(x64emu_t *emu, uintptr_t fcn) { DFppi_t fn = (DFppi_t)fcn; long double ld=fn((void*)R_RDI, (void*)R_RSI, (int32_t)R_RDX); fpu_do_push(emu); ST0val = ld; }
+void DFppp(x64emu_t *emu, uintptr_t fcn) { DFppp_t fn = (DFppp_t)fcn; long double ld=fn((void*)R_RDI, (void*)R_RSI, (void*)R_RDX); fpu_do_push(emu); ST0val = ld; }
+void pFDip(x64emu_t *emu, uintptr_t fcn) { pFDip_t fn = (pFDip_t)fcn; R_RAX=(uintptr_t)fn(LD2localLD((void*)(R_RSP + 8)), (int32_t)R_RDI, (void*)R_RSI); }
+void iFpLpD(x64emu_t *emu, uintptr_t fcn) { iFpLpD_t fn = (iFpLpD_t)fcn; R_RAX=(uint32_t)fn((void*)R_RDI, (uintptr_t)R_RSI, (void*)R_RDX, LD2localLD((void*)(R_RSP + 8))); }
+void pFDipp(x64emu_t *emu, uintptr_t fcn) { pFDipp_t fn = (pFDipp_t)fcn; R_RAX=(uintptr_t)fn(LD2localLD((void*)(R_RSP + 8)), (int32_t)R_RDI, (void*)R_RSI, (void*)R_RDX); }
+void iFDipppL(x64emu_t *emu, uintptr_t fcn) { iFDipppL_t fn = (iFDipppL_t)fcn; R_RAX=(uint32_t)fn(LD2localLD((void*)(R_RSP + 8)), (int32_t)R_RDI, (void*)R_RSI, (void*)R_RDX, (void*)R_RCX, (uintptr_t)R_R8); }
+void vFppippDpDc(x64emu_t *emu, uintptr_t fcn) { vFppippDpDc_t fn = (vFppippDpDc_t)fcn; fn((void*)R_RDI, (void*)R_RSI, (int32_t)R_RDX, (void*)R_RCX, (void*)R_R8, LD2localLD((void*)(R_RSP + 8)), (void*)R_R9, LD2localLD((void*)(R_RSP + 24)), *(int8_t*)(R_RSP + 40)); }
 #endif
 
 #if defined(NOALIGN)