summary refs log tree commit diff stats
path: root/util/host-utils.c
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2021-10-29 08:39:44 -0700
committerRichard Henderson <richard.henderson@linaro.org>2021-10-29 08:39:44 -0700
commita92cecba2791cd408d2bca04ce181dc2abaf9695 (patch)
tree1add13b9478f342d6d77015dc78595a4bc84bc76 /util/host-utils.c
parentedf044c558acfcf9ccd45ebacb84542dcca2e813 (diff)
parentefd629fb21e2ff6a8f62642d9ed7a23dfee4d320 (diff)
downloadfocaccia-qemu-a92cecba2791cd408d2bca04ce181dc2abaf9695.tar.gz
focaccia-qemu-a92cecba2791cd408d2bca04ce181dc2abaf9695.zip
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20211028' into staging
Improvements to qemu/int128
Fixes for 128/64 division.
Cleanup tcg/optimize.c
Optimize redundant sign extensions

# gpg: Signature made Thu 28 Oct 2021 09:06:00 PM PDT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]

* remotes/rth/tags/pull-tcg-20211028: (60 commits)
  softmmu: fix for "after access" watchpoints
  softmmu: remove useless condition in watchpoint check
  softmmu: fix watchpoint processing in icount mode
  tcg/optimize: Propagate sign info for shifting
  tcg/optimize: Propagate sign info for bit counting
  tcg/optimize: Propagate sign info for setcond
  tcg/optimize: Propagate sign info for logical operations
  tcg/optimize: Optimize sign extensions
  tcg/optimize: Use fold_xx_to_i for rem
  tcg/optimize: Use fold_xi_to_x for div
  tcg/optimize: Use fold_xi_to_x for mul
  tcg/optimize: Use fold_xx_to_i for orc
  tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values
  tcg: Extend call args using the correct opcodes
  tcg/optimize: Sink commutative operand swapping into fold functions
  tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops
  tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies
  tcg/optimize: Split out fold_masks
  tcg/optimize: Split out fold_ix_to_i
  tcg/optimize: Split out fold_xi_to_x
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'util/host-utils.c')
-rw-r--r--util/host-utils.c137
1 files changed, 89 insertions, 48 deletions
diff --git a/util/host-utils.c b/util/host-utils.c
index a789a11b46..bcc772b8ec 100644
--- a/util/host-utils.c
+++ b/util/host-utils.c
@@ -86,78 +86,119 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
     *phigh = rh;
 }
 
-/* Unsigned 128x64 division.  Returns 1 if overflow (divide by zero or */
-/* quotient exceeds 64 bits).  Otherwise returns quotient via plow and */
-/* remainder via phigh. */
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+/*
+ * Unsigned 128-by-64 division.
+ * Returns the remainder.
+ * Returns quotient via plow and phigh.
+ * Also returns the remainder via the function return value.
+ */
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
 {
     uint64_t dhi = *phigh;
     uint64_t dlo = *plow;
-    unsigned i;
-    uint64_t carry = 0;
+    uint64_t rem, dhighest;
+    int sh;
 
-    if (divisor == 0) {
-        return 1;
-    } else if (dhi == 0) {
+    if (divisor == 0 || dhi == 0) {
         *plow  = dlo / divisor;
-        *phigh = dlo % divisor;
-        return 0;
-    } else if (dhi >= divisor) {
-        return 1;
+        *phigh = 0;
+        return dlo % divisor;
     } else {
+        sh = clz64(divisor);
 
-        for (i = 0; i < 64; i++) {
-            carry = dhi >> 63;
-            dhi = (dhi << 1) | (dlo >> 63);
-            if (carry || (dhi >= divisor)) {
-                dhi -= divisor;
-                carry = 1;
+        if (dhi < divisor) {
+            if (sh != 0) {
+                /* normalize the divisor, shifting the dividend accordingly */
+                divisor <<= sh;
+                dhi = (dhi << sh) | (dlo >> (64 - sh));
+                dlo <<= sh;
+            }
+
+            *phigh = 0;
+            *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
+        } else {
+            if (sh != 0) {
+                /* normalize the divisor, shifting the dividend accordingly */
+                divisor <<= sh;
+                dhighest = dhi >> (64 - sh);
+                dhi = (dhi << sh) | (dlo >> (64 - sh));
+                dlo <<= sh;
+
+                *phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
             } else {
-                carry = 0;
+                /**
+                 * dhi >= divisor
+                 * Since the MSB of divisor is set (sh == 0),
+                 * (dhi - divisor) < divisor
+                 *
+                 * Thus, the high part of the quotient is 1, and we can
+                 * calculate the low part with a single call to udiv_qrnnd
+                 * after subtracting divisor from dhi
+                 */
+                dhi -= divisor;
+                *phigh = 1;
             }
-            dlo = (dlo << 1) | carry;
+
+            *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
         }
 
-        *plow = dlo;
-        *phigh = dhi;
-        return 0;
+        /*
+         * since the dividend/divisor might have been normalized,
+         * the remainder might also have to be shifted back
+         */
+        return rem >> sh;
     }
 }
 
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+/*
+ * Signed 128-by-64 division.
+ * Returns quotient via plow and phigh.
+ * Also returns the remainder via the function return value.
+ */
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
 {
-    int sgn_dvdnd = *phigh < 0;
-    int sgn_divsr = divisor < 0;
-    int overflow = 0;
-
-    if (sgn_dvdnd) {
-        *plow = ~(*plow);
-        *phigh = ~(*phigh);
-        if (*plow == (int64_t)-1) {
-            *plow = 0;
-            (*phigh)++;
-         } else {
-            (*plow)++;
-         }
-    }
+    bool neg_quotient = false, neg_remainder = false;
+    uint64_t unsig_hi = *phigh, unsig_lo = *plow;
+    uint64_t rem;
 
-    if (sgn_divsr) {
-        divisor = 0 - divisor;
+    if (*phigh < 0) {
+        neg_quotient = !neg_quotient;
+        neg_remainder = !neg_remainder;
+
+        if (unsig_lo == 0) {
+            unsig_hi = -unsig_hi;
+        } else {
+            unsig_hi = ~unsig_hi;
+            unsig_lo = -unsig_lo;
+        }
     }
 
-    overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
+    if (divisor < 0) {
+        neg_quotient = !neg_quotient;
 
-    if (sgn_dvdnd  ^ sgn_divsr) {
-        *plow = 0 - *plow;
+        divisor = -divisor;
     }
 
-    if (!overflow) {
-        if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
-            overflow = 1;
+    rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
+
+    if (neg_quotient) {
+        if (unsig_lo == 0) {
+            *phigh = -unsig_hi;
+            *plow = 0;
+        } else {
+            *phigh = ~unsig_hi;
+            *plow = -unsig_lo;
         }
+    } else {
+        *phigh = unsig_hi;
+        *plow = unsig_lo;
     }
 
-    return overflow;
+    if (neg_remainder) {
+        return -rem;
+    } else {
+        return rem;
+    }
 }
 #endif