Added preliminary Box32 support (#1760)

* Improve the ReserveHigMemory helper function * [BOX32] Added some wrapping infrastructure * [BOX32] More wrapped 32bits lib infrastructure * [BOX32] Added callback and tls 32bits handling * [BOX32] Added more 32bits, around wrappers and elfs * [BOX32] Added the 32bits version of myalign * [BOX32] More wrapped libs and 32bits fixes and imrpovments * [BOX32] Added some 32bits tests * [BOX32] Try to enable some Box32 build and test on the CI * [BOX32] Disable Box32 testing on CI platform that use qemu * [BOX32] Another attempt to disable Box32 testing on CI platform that use qemu * [BOX32] Small fix for another attempt to disable Box32 testing on CI platform that use qemu * [BOX32] Yet another fix for another attempt to disable Box32 testing on CI platform that use qemu * [BOX32] Fixed a typo in CI script * [BOX32] Better scratch alighnment and enabled more tests * [BOX32] Added (partial) wrapped 32bits librt * [BOX32] Added mention of Box32 in README * [BOX32] Added phtread handling, and numerous fixes to 32bits handling. [ARM64_DYNAREC] Fixed access to segment with negative offset * [BOX32] Added system libs and cpp testing, plus some more fixes * [BOX32] Fix previous commit * [BOX32] Better stack adjustment for 32bits processes * [BOX32] Added getenv wrapped 32bits function and friends * [BOX32] Don't look for box86 for a Box32 build * [BOX32] Don't do 32bits cppThreads test for now on CI * [BOX32] Enabled a few more 32bits tests * [BOX32] For ld_lib_path for both CppThreads tests * [BOX32] [ANDROID] Some Fixes for Android Build * [BOX32] Still need to disable cppThread_32bits test on CI for some reason * [BOX32] [ANDROID] Don't show PreInit Array Warning (#1751) * [BOX32] [ANDROID] One More Fix for Android Build That I forgotten to … (#1752) * [BOX32] [ANDROID] One More Fix for Android Build That I forgotten to push before * [BOX32] [ANDROID] Try to Create __libc_init * [BOX32] [ANDROID] Try to disable NEEDED_LIBS for now (libdl is not wrapped) * [BOX32] Updated generated files * [BOX32] Added 32bits context functions * [BOX32] Added 32bits signal handling * [BOX32] Added some missing 32bits elfloader functions * [BOX32] Fix build on x86_64 machine * [BOX32] Better fix for x86_64 build * [BOX32] Actually added missing libs, and re-enabled cppThreads_32bits test * [BOX32] Added wrapped 32bits libdl * [BOX32] Try to re-enabled Box32 test on CI for ARM64 builds * [BOX32] fine-tuning Box32 test on CI for ARM64 builds * [BOX32] More fine-tuning to Box32 test on CI for ARM64 builds * [BOX32] Enabled Box32 test on CI for LA64 and RV64 builds too * [BOX32] re-Disabled Box32 test on CI for LA64 and RV64 builds, not working for now * [BOX32] Temporarily disabled cppThreads_32bits test on CI --------- Co-authored-by: KreitinnSoftware <pablopro5051@gmail.com> Co-authored-by: KreitinnSoftware <80591934+KreitinnSoftware@users.noreply.github.com>
author: ptitSeb <sebastien.chev@gmail.com> 2024-08-26 17:45:13 +0200
committer: GitHub <noreply@github.com> 2024-08-26 17:45:13 +0200
commit: b5105a1e57bba3305d5dce93ab4d2f7faab6b34a (patch)
tree: ab26b700d3c48f2c8e32a1084ae7c2e7a8448b06 /tests32/benchfloat.c
parent: 9beb745765e9c99bad6410094a97bf0bf9ebc1eb (diff)
download: box64-b5105a1e57bba3305d5dce93ab4d2f7faab6b34a.tar.gz
box64-b5105a1e57bba3305d5dce93ab4d2f7faab6b34a.zip
1 files changed, 888 insertions, 0 deletions
diff --git a/tests32/benchfloat.c b/tests32/benchfloat.c
new file mode 100755
index 00000000..93b8207a
--- /dev/null
+++ b/tests32/benchfloat.c
@@ -0,0 +1,888 @@
+/*
+**
+** LINPACK.C        Linpack benchmark, calculates FLOPS.
+**                  (FLoating Point Operations Per Second)
+**
+** Translated to C by Bonnie Toy 5/88
+**
+** Modified by Will Menninger, 10/93, with these features:
+**  (modified on 2/25/94  to fix a problem with daxpy  for
+**   unequal increments or equal increments not equal to 1.
+**     Jack Dongarra)
+**
+** - Defaults to double precision.
+** - Averages ROLLed and UNROLLed performance.
+** - User selectable array sizes.
+** - Automatically does enough repetitions to take at least 10 CPU seconds.
+** - Prints machine precision.
+** - ANSI prototyping.
+**
+** To compile:  cc -O -o linpack linpack.c -lm
+**
+**
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <time.h>
+#include <float.h>
+
+#define DP
+
+#ifdef SP
+#define ZERO        0.0
+#define ONE         1.0
+#define PREC        "Single"
+#define BASE10DIG   FLT_DIG
+
+typedef float   REAL;
+#endif
+
+#ifdef DP
+#define ZERO        0.0e0
+#define ONE         1.0e0
+#define PREC        "Double"
+#define BASE10DIG   DBL_DIG
+
+typedef double  REAL;
+#endif
+
+static REAL linpack  (long nreps,int arsize);
+static void matgen   (REAL *a,int lda,int n,REAL *b,REAL *norma);
+static void dgefa    (REAL *a,int lda,int n,int *ipvt,int *info,int roll);
+static void dgesl    (REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll);
+static void daxpy_r  (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
+static REAL ddot_r   (int n,REAL *dx,int incx,REAL *dy,int incy);
+static void dscal_r  (int n,REAL da,REAL *dx,int incx);
+static void daxpy_ur (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
+static REAL ddot_ur  (int n,REAL *dx,int incx,REAL *dy,int incy);
+static void dscal_ur (int n,REAL da,REAL *dx,int incx);
+static int  idamax   (int n,REAL *dx,int incx);
+static REAL second   (void);
+
+static void *mempool;
+
+
+void main(int argc, const char** argv)
+
+    {
+    char    buf[80];
+    int     arsize;
+    long    arsize2d,memreq,nreps;
+    size_t  malloc_arg;
+
+    if(argc>1)
+        strcpy(buf, argv[1]);
+
+    while (1)
+        {
+        /*printf("Enter array size (q to quit) [200]:  ");
+        fgets(buf,79,stdin);*/
+        
+        if (buf[0]=='q' || buf[0]=='Q')
+            break;
+        if (buf[0]=='\0' || buf[0]=='\n')
+            arsize=200;
+        else
+            arsize=atoi(buf);
+        arsize/=2;
+        arsize*=2;
+        if (arsize<10)
+            {
+            printf("Too small.\n");
+            continue;
+            }
+        arsize2d = (long)arsize*(long)arsize;
+        memreq=arsize2d*sizeof(REAL)+(long)arsize*sizeof(REAL)+(long)arsize*sizeof(int);
+        printf("Memory required:  %ldK.\n",(memreq+512L)>>10);
+        malloc_arg=(size_t)memreq;
+        if (malloc_arg!=memreq || (mempool=malloc(malloc_arg))==NULL)
+            {
+            printf("Not enough memory available for given array size.\n\n");
+            continue;
+            }
+        printf("\n\nLINPACK benchmark, %s precision.\n",PREC);
+        printf("Machine precision:  %d digits.\n",BASE10DIG);
+        printf("Array size %d X %d.\n",arsize,arsize);
+        printf("Average rolled and unrolled performance:\n\n");
+        printf("    Reps Time(s) DGEFA   DGESL  OVERHEAD    KFLOPS\n");
+        printf("----------------------------------------------------\n");
+        nreps=1;
+        while (linpack(nreps,arsize)<10.)
+            nreps*=2;
+        free(mempool);
+        printf("\n");
+        strcpy(buf, "q");
+        }
+    }
+
+
+static REAL linpack(long nreps,int arsize)
+
+    {
+    REAL  *a,*b;
+    REAL   norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
+    int   *ipvt,n,info,lda;
+    long   i,arsize2d;
+
+    lda = arsize;
+    n = arsize/2;
+    arsize2d = (long)arsize*(long)arsize;
+    ops=((2.0*n*n*n)/3.0+2.0*n*n);
+    a=(REAL *)mempool;
+    b=a+arsize2d;
+    ipvt=(int *)&b[arsize];
+    tdgesl=0;
+    tdgefa=0;
+    totalt=second();
+    for (i=0;i<nreps;i++)
+        {
+        matgen(a,lda,n,b,&norma);
+        t1 = second();
+        dgefa(a,lda,n,ipvt,&info,1);
+        tdgefa += second()-t1;
+        t1 = second();
+        dgesl(a,lda,n,ipvt,b,0,1);
+        tdgesl += second()-t1;
+        }
+    for (i=0;i<nreps;i++)
+        {
+        matgen(a,lda,n,b,&norma);
+        t1 = second();
+        dgefa(a,lda,n,ipvt,&info,0);
+        tdgefa += second()-t1;
+        t1 = second();
+        dgesl(a,lda,n,ipvt,b,0,0);
+        tdgesl += second()-t1;
+        }
+    totalt=second()-totalt;
+    if (totalt<0.5 || tdgefa+tdgesl<0.2)
+        return(0.);
+    kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl));
+    toverhead=totalt-tdgefa-tdgesl;
+    if (tdgefa<0.)
+        tdgefa=0.;
+    if (tdgesl<0.)
+        tdgesl=0.;
+    if (toverhead<0.)
+        toverhead=0.;
+    printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%%  %9.3f\n",
+            nreps,totalt,100.*tdgefa/totalt,
+            100.*tdgesl/totalt,100.*toverhead/totalt,
+            kflops);
+    return(totalt);
+    }
+
+
+/*
+** For matgen,
+** We would like to declare a[][lda], but c does not allow it.  In this
+** function, references to a[i][j] are written a[lda*i+j].
+*/
+static void matgen(REAL *a,int lda,int n,REAL *b,REAL *norma)
+
+    {
+    int init,i,j;
+
+    init = 1325;
+    *norma = 0.0;
+    for (j = 0; j < n; j++)
+        for (i = 0; i < n; i++)
+            {
+            init = (int)((long)3125*(long)init % 65536L);
+            a[lda*j+i] = (init - 32768.0)/16384.0;
+            *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma;
+            }
+    for (i = 0; i < n; i++)
+        b[i] = 0.0;
+    for (j = 0; j < n; j++)
+        for (i = 0; i < n; i++)
+            b[i] = b[i] + a[lda*j+i];
+    }
+
+
+/*
+**
+** DGEFA benchmark
+**
+** We would like to declare a[][lda], but c does not allow it.  In this
+** function, references to a[i][j] are written a[lda*i+j].
+**
+**   dgefa factors a double precision matrix by gaussian elimination.
+**
+**   dgefa is usually called by dgeco, but it can be called
+**   directly with a saving in time if  rcond  is not needed.
+**   (time for dgeco) = (1 + 9/n)*(time for dgefa) .
+**
+**   on entry
+**
+**      a       REAL precision[n][lda]
+**              the matrix to be factored.
+**
+**      lda     integer
+**              the leading dimension of the array  a .
+**
+**      n       integer
+**              the order of the matrix  a .
+**
+**   on return
+**
+**      a       an upper triangular matrix and the multipliers
+**              which were used to obtain it.
+**              the factorization can be written  a = l*u  where
+**              l  is a product of permutation and unit lower
+**              triangular matrices and  u  is upper triangular.
+**
+**      ipvt    integer[n]
+**              an integer vector of pivot indices.
+**
+**      info    integer
+**              = 0  normal value.
+**              = k  if  u[k][k] .eq. 0.0 .  this is not an error
+**                   condition for this subroutine, but it does
+**                   indicate that dgesl or dgedi will divide by zero
+**                   if called.  use  rcond  in dgeco for a reliable
+**                   indication of singularity.
+**
+**   linpack. this version dated 08/14/78 .
+**   cleve moler, university of New Mexico, argonne national lab.
+**
+**   functions
+**
+**   blas daxpy,dscal,idamax
+**
+*/
+static void dgefa(REAL *a,int lda,int n,int *ipvt,int *info,int roll)
+
+    {
+    REAL t;
+    int idamax(),j,k,kp1,l,nm1;
+
+    /* gaussian elimination with partial pivoting */
+
+    if (roll)
+        {
+        *info = 0;
+        nm1 = n - 1;
+        if (nm1 >=  0)
+            for (k = 0; k < nm1; k++)
+                {
+                kp1 = k + 1;
+
+                /* find l = pivot index */
+
+                l = idamax(n-k,&a[lda*k+k],1) + k;
+                ipvt[k] = l;
+
+                /* zero pivot implies this column already
+                   triangularized */
+
+                if (a[lda*k+l] != ZERO)
+                    {
+
+                    /* interchange if necessary */
+
+                    if (l != k)
+                        {
+                        t = a[lda*k+l];
+                        a[lda*k+l] = a[lda*k+k];
+                        a[lda*k+k] = t;
+                        }
+
+                    /* compute multipliers */
+
+                    t = -ONE/a[lda*k+k];
+                    dscal_r(n-(k+1),t,&a[lda*k+k+1],1);
+
+                    /* row elimination with column indexing */
+
+                    for (j = kp1; j < n; j++)
+                        {
+                        t = a[lda*j+l];
+                        if (l != k)
+                            {
+                            a[lda*j+l] = a[lda*j+k];
+                            a[lda*j+k] = t;
+                            }
+                        daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
+                        }
+                    }
+                else
+                    (*info) = k;
+                }
+        ipvt[n-1] = n-1;
+        if (a[lda*(n-1)+(n-1)] == ZERO)
+            (*info) = n-1;
+        }
+    else
+        {
+        *info = 0;
+        nm1 = n - 1;
+        if (nm1 >=  0)
+            for (k = 0; k < nm1; k++)
+                {
+                kp1 = k + 1;
+
+                /* find l = pivot index */
+
+                l = idamax(n-k,&a[lda*k+k],1) + k;
+                ipvt[k] = l;
+
+                /* zero pivot implies this column already
+                   triangularized */
+
+                if (a[lda*k+l] != ZERO)
+                    {
+
+                    /* interchange if necessary */
+
+                    if (l != k)
+                        {
+                        t = a[lda*k+l];
+                        a[lda*k+l] = a[lda*k+k];
+                        a[lda*k+k] = t;
+                        }
+
+                    /* compute multipliers */
+
+                    t = -ONE/a[lda*k+k];
+                    dscal_ur(n-(k+1),t,&a[lda*k+k+1],1);
+
+                    /* row elimination with column indexing */
+
+                    for (j = kp1; j < n; j++)
+                        {
+                        t = a[lda*j+l];
+                        if (l != k)
+                            {
+                            a[lda*j+l] = a[lda*j+k];
+                            a[lda*j+k] = t;
+                            }
+                        daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
+                        }
+                    }
+                else
+                    (*info) = k;
+                }
+        ipvt[n-1] = n-1;
+        if (a[lda*(n-1)+(n-1)] == ZERO)
+            (*info) = n-1;
+        }
+    }
+
+
+/*
+**
+** DGESL benchmark
+**
+** We would like to declare a[][lda], but c does not allow it.  In this
+** function, references to a[i][j] are written a[lda*i+j].
+**
+**   dgesl solves the double precision system
+**   a * x = b  or  trans(a) * x = b
+**   using the factors computed by dgeco or dgefa.
+**
+**   on entry
+**
+**      a       double precision[n][lda]
+**              the output from dgeco or dgefa.
+**
+**      lda     integer
+**              the leading dimension of the array  a .
+**
+**      n       integer
+**              the order of the matrix  a .
+**
+**      ipvt    integer[n]
+**              the pivot vector from dgeco or dgefa.
+**
+**      b       double precision[n]
+**              the right hand side vector.
+**
+**      job     integer
+**              = 0         to solve  a*x = b ,
+**              = nonzero   to solve  trans(a)*x = b  where
+**                          trans(a)  is the transpose.
+**
+**  on return
+**
+**      b       the solution vector  x .
+**
+**   error condition
+**
+**      a division by zero will occur if the input factor contains a
+**      zero on the diagonal.  technically this indicates singularity
+**      but it is often caused by improper arguments or improper
+**      setting of lda .  it will not occur if the subroutines are
+**      called correctly and if dgeco has set rcond .gt. 0.0
+**      or dgefa has set info .eq. 0 .
+**
+**   to compute  inverse(a) * c  where  c  is a matrix
+**   with  p  columns
+**         dgeco(a,lda,n,ipvt,rcond,z)
+**         if (!rcond is too small){
+**              for (j=0,j<p,j++)
+**                      dgesl(a,lda,n,ipvt,c[j][0],0);
+**         }
+**
+**   linpack. this version dated 08/14/78 .
+**   cleve moler, university of new mexico, argonne national lab.
+**
+**   functions
+**
+**   blas daxpy,ddot
+*/
+static void dgesl(REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll)
+
+    {
+    REAL    t;
+    int     k,kb,l,nm1;
+
+    if (roll)
+        {
+        nm1 = n - 1;
+        if (job == 0)
+            {
+
+            /* job = 0 , solve  a * x = b   */
+            /* first solve  l*y = b         */
+
+            if (nm1 >= 1)
+                for (k = 0; k < nm1; k++)
+                    {
+                    l = ipvt[k];
+                    t = b[l];
+                    if (l != k)
+                        {
+                        b[l] = b[k];
+                        b[k] = t;
+                        }
+                    daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
+                    }
+
+            /* now solve  u*x = y */
+
+            for (kb = 0; kb < n; kb++)
+                {
+                k = n - (kb + 1);
+                b[k] = b[k]/a[lda*k+k];
+                t = -b[k];
+                daxpy_r(k,t,&a[lda*k+0],1,&b[0],1);
+                }
+            }
+        else
+            {
+
+            /* job = nonzero, solve  trans(a) * x = b  */
+            /* first solve  trans(u)*y = b             */
+
+            for (k = 0; k < n; k++)
+                {
+                t = ddot_r(k,&a[lda*k+0],1,&b[0],1);
+                b[k] = (b[k] - t)/a[lda*k+k];
+                }
+
+            /* now solve trans(l)*x = y     */
+
+            if (nm1 >= 1)
+                for (kb = 1; kb < nm1; kb++)
+                    {
+                    k = n - (kb+1);
+                    b[k] = b[k] + ddot_r(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
+                    l = ipvt[k];
+                    if (l != k)
+                        {
+                        t = b[l];
+                        b[l] = b[k];
+                        b[k] = t;
+                        }
+                    }
+            }
+        }
+    else
+        {
+        nm1 = n - 1;
+        if (job == 0)
+            {
+
+            /* job = 0 , solve  a * x = b   */
+            /* first solve  l*y = b         */
+
+            if (nm1 >= 1)
+                for (k = 0; k < nm1; k++)
+                    {
+                    l = ipvt[k];
+                    t = b[l];
+                    if (l != k)
+                        {
+                        b[l] = b[k];
+                        b[k] = t;
+                        }
+                    daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
+                    }
+
+            /* now solve  u*x = y */
+
+            for (kb = 0; kb < n; kb++)
+                {
+                k = n - (kb + 1);
+                b[k] = b[k]/a[lda*k+k];
+                t = -b[k];
+                daxpy_ur(k,t,&a[lda*k+0],1,&b[0],1);
+                }
+            }
+        else
+            {
+
+            /* job = nonzero, solve  trans(a) * x = b  */
+            /* first solve  trans(u)*y = b             */
+
+            for (k = 0; k < n; k++)
+                {
+                t = ddot_ur(k,&a[lda*k+0],1,&b[0],1);
+                b[k] = (b[k] - t)/a[lda*k+k];
+                }
+
+            /* now solve trans(l)*x = y     */
+
+            if (nm1 >= 1)
+                for (kb = 1; kb < nm1; kb++)
+                    {
+                    k = n - (kb+1);
+                    b[k] = b[k] + ddot_ur(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
+                    l = ipvt[k];
+                    if (l != k)
+                        {
+                        t = b[l];
+                        b[l] = b[k];
+                        b[k] = t;
+                        }
+                    }
+            }
+        }
+    }
+
+
+
+/*
+** Constant times a vector plus a vector.
+** Jack Dongarra, linpack, 3/11/78.
+** ROLLED version
+*/
+static void daxpy_r(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)
+
+    {
+    int i,ix,iy;
+
+    if (n <= 0)
+        return;
+    if (da == ZERO)
+        return;
+
+    if (incx != 1 || incy != 1)
+        {
+
+        /* code for unequal increments or equal increments != 1 */
+
+        ix = 1;
+        iy = 1;
+        if(incx < 0) ix = (-n+1)*incx + 1;
+        if(incy < 0)iy = (-n+1)*incy + 1;
+        for (i = 0;i < n; i++)
+            {
+            dy[iy] = dy[iy] + da*dx[ix];
+            ix = ix + incx;
+            iy = iy + incy;
+            }
+        return;
+        }
+
+    /* code for both increments equal to 1 */
+
+    for (i = 0;i < n; i++)
+        dy[i] = dy[i] + da*dx[i];
+    }
+
+
+/*
+** Forms the dot product of two vectors.
+** Jack Dongarra, linpack, 3/11/78.
+** ROLLED version
+*/
+static REAL ddot_r(int n,REAL *dx,int incx,REAL *dy,int incy)
+
+    {
+    REAL dtemp;
+    int i,ix,iy;
+
+    dtemp = ZERO;
+
+    if (n <= 0)
+        return(ZERO);
+
+    if (incx != 1 || incy != 1)
+        {
+
+        /* code for unequal increments or equal increments != 1 */
+
+        ix = 0;
+        iy = 0;
+        if (incx < 0) ix = (-n+1)*incx;
+        if (incy < 0) iy = (-n+1)*incy;
+        for (i = 0;i < n; i++)
+            {
+            dtemp = dtemp + dx[ix]*dy[iy];
+            ix = ix + incx;
+            iy = iy + incy;
+            }
+        return(dtemp);
+        }
+
+    /* code for both increments equal to 1 */
+
+    for (i=0;i < n; i++)
+        dtemp = dtemp + dx[i]*dy[i];
+    return(dtemp);
+    }
+
+
+/*
+** Scales a vector by a constant.
+** Jack Dongarra, linpack, 3/11/78.
+** ROLLED version
+*/
+static void dscal_r(int n,REAL da,REAL *dx,int incx)
+
+    {
+    int i,nincx;
+
+    if (n <= 0)
+        return;
+    if (incx != 1)
+        {
+
+        /* code for increment not equal to 1 */
+
+        nincx = n*incx;
+        for (i = 0; i < nincx; i = i + incx)
+            dx[i] = da*dx[i];
+        return;
+        }
+
+    /* code for increment equal to 1 */
+
+    for (i = 0; i < n; i++)
+        dx[i] = da*dx[i];
+    }
+
+
+/*
+** constant times a vector plus a vector.
+** Jack Dongarra, linpack, 3/11/78.
+** UNROLLED version
+*/
+static void daxpy_ur(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)
+
+    {
+    int i,ix,iy,m;
+
+    if (n <= 0)
+        return;
+    if (da == ZERO)
+        return;
+
+    if (incx != 1 || incy != 1)
+        {
+
+        /* code for unequal increments or equal increments != 1 */
+
+        ix = 1;
+        iy = 1;
+        if(incx < 0) ix = (-n+1)*incx + 1;
+        if(incy < 0)iy = (-n+1)*incy + 1;
+        for (i = 0;i < n; i++)
+            {
+            dy[iy] = dy[iy] + da*dx[ix];
+            ix = ix + incx;
+            iy = iy + incy;
+            }
+        return;
+        }
+
+    /* code for both increments equal to 1 */
+
+    m = n % 4;
+    if ( m != 0)
+        {
+        for (i = 0; i < m; i++)
+            dy[i] = dy[i] + da*dx[i];
+        if (n < 4)
+            return;
+        }
+    for (i = m; i < n; i = i + 4)
+        {
+        dy[i] = dy[i] + da*dx[i];
+        dy[i+1] = dy[i+1] + da*dx[i+1];
+        dy[i+2] = dy[i+2] + da*dx[i+2];
+        dy[i+3] = dy[i+3] + da*dx[i+3];
+        }
+    }
+
+
+/*
+** Forms the dot product of two vectors.
+** Jack Dongarra, linpack, 3/11/78.
+** UNROLLED version
+*/
+static REAL ddot_ur(int n,REAL *dx,int incx,REAL *dy,int incy)
+
+    {
+    REAL dtemp;
+    int i,ix,iy,m;
+
+    dtemp = ZERO;
+
+    if (n <= 0)
+        return(ZERO);
+
+    if (incx != 1 || incy != 1)
+        {
+
+        /* code for unequal increments or equal increments != 1 */
+
+        ix = 0;
+        iy = 0;
+        if (incx < 0) ix = (-n+1)*incx;
+        if (incy < 0) iy = (-n+1)*incy;
+        for (i = 0;i < n; i++)
+            {
+            dtemp = dtemp + dx[ix]*dy[iy];
+            ix = ix + incx;
+            iy = iy + incy;
+            }
+        return(dtemp);
+        }
+
+    /* code for both increments equal to 1 */
+
+    m = n % 5;
+    if (m != 0)
+        {
+        for (i = 0; i < m; i++)
+            dtemp = dtemp + dx[i]*dy[i];
+        if (n < 5)
+            return(dtemp);
+        }
+    for (i = m; i < n; i = i + 5)
+        {
+        dtemp = dtemp + dx[i]*dy[i] +
+        dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] +
+        dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4];
+        }
+    return(dtemp);
+    }
+
+
+/*
+** Scales a vector by a constant.
+** Jack Dongarra, linpack, 3/11/78.
+** UNROLLED version
+*/
+static void dscal_ur(int n,REAL da,REAL *dx,int incx)
+
+    {
+    int i,m,nincx;
+
+    if (n <= 0)
+        return;
+    if (incx != 1)
+        {
+
+        /* code for increment not equal to 1 */
+
+        nincx = n*incx;
+        for (i = 0; i < nincx; i = i + incx)
+            dx[i] = da*dx[i];
+        return;
+        }
+
+    /* code for increment equal to 1 */
+
+    m = n % 5;
+    if (m != 0)
+        {
+        for (i = 0; i < m; i++)
+            dx[i] = da*dx[i];
+        if (n < 5)
+            return;
+        }
+    for (i = m; i < n; i = i + 5)
+        {
+        dx[i] = da*dx[i];
+        dx[i+1] = da*dx[i+1];
+        dx[i+2] = da*dx[i+2];
+        dx[i+3] = da*dx[i+3];
+        dx[i+4] = da*dx[i+4];
+        }
+    }
+
+
+/*
+** Finds the index of element having max. absolute value.
+** Jack Dongarra, linpack, 3/11/78.
+*/
+static int idamax(int n,REAL *dx,int incx)
+
+    {
+    REAL dmax;
+    int i, ix, itemp;
+
+    if (n < 1)
+        return(-1);
+    if (n ==1 )
+        return(0);
+    if(incx != 1)
+        {
+
+        /* code for increment not equal to 1 */
+
+        ix = 1;
+        dmax = fabs((double)dx[0]);
+        ix = ix + incx;
+        for (i = 1; i < n; i++)
+            {
+            if(fabs((double)dx[ix]) > dmax)
+                {
+                itemp = i;
+                dmax = fabs((double)dx[ix]);
+                }
+            ix = ix + incx;
+            }
+        }
+    else
+        {
+
+        /* code for increment equal to 1 */
+
+        itemp = 0;
+        dmax = fabs((double)dx[0]);
+        for (i = 1; i < n; i++)
+            if(fabs((double)dx[i]) > dmax)
+                {
+                itemp = i;
+                dmax = fabs((double)dx[i]);
+                }
+        }
+    return (itemp);
+    }
+
+
+static REAL second(void)
+
+    {
+    return ((REAL)((REAL)clock()/(REAL)CLOCKS_PER_SEC));
+    }
+
+
author	ptitSeb <sebastien.chev@gmail.com>	2024-08-26 17:45:13 +0200
committer	GitHub <noreply@github.com>	2024-08-26 17:45:13 +0200
commit	b5105a1e57bba3305d5dce93ab4d2f7faab6b34a (patch)
tree	ab26b700d3c48f2c8e32a1084ae7c2e7a8448b06 /tests32/benchfloat.c
parent	9beb745765e9c99bad6410094a97bf0bf9ebc1eb (diff)
download	box64-b5105a1e57bba3305d5dce93ab4d2f7faab6b34a.tar.gz box64-b5105a1e57bba3305d5dce93ab4d2f7faab6b34a.zip