7 files changed, 221 insertions, 173 deletions
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 0afd96018e..2439af63a0 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -899,6 +899,18 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
 
     clamp_id_aa64mmfr0_parange_to_ipa_size(&host_isar.id_aa64mmfr0);
 
+    /*
+     * Disable SME, which is not properly handled by QEMU hvf yet.
+     * To allow this through we would need to:
+     * - make sure that the SME state is correctly handled in the
+     *   get_registers/put_registers functions
+     * - get the SME-specific CPU properties to work with accelerators
+     *   other than TCG
+     * - fix any assumptions we made that SME implies SVE (since
+     *   on the M4 there is SME but not SVE)
+     */
+    host_isar.id_aa64pfr1 &= ~R_ID_AA64PFR1_SME_MASK;
+
     ahcf->isar = host_isar;
 
     /*
@@ -1971,6 +1983,7 @@ int hvf_vcpu_exec(CPUState *cpu)
         bool isv = syndrome & ARM_EL_ISV;
         bool iswrite = (syndrome >> 6) & 1;
         bool s1ptw = (syndrome >> 7) & 1;
+        bool sse = (syndrome >> 21) & 1;
         uint32_t sas = (syndrome >> 22) & 3;
         uint32_t len = 1 << sas;
         uint32_t srt = (syndrome >> 16) & 0x1f;
@@ -1998,6 +2011,9 @@ int hvf_vcpu_exec(CPUState *cpu)
             address_space_read(&address_space_memory,
                                hvf_exit->exception.physical_address,
                                MEMTXATTRS_UNSPECIFIED, &val, len);
+            if (sse) {
+                val = sextract64(val, 0, len * 8);
+            }
             hvf_set_reg(cpu, srt, val);
         }
 
diff --git a/target/arm/internals.h b/target/arm/internals.h
index b318734145..a6ff228f9f 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1833,5 +1833,14 @@ int alle1_tlbmask(CPUARMState *env);
 void arm_set_default_fp_behaviours(float_status *s);
 /* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */
 void arm_set_ah_fp_behaviours(float_status *s);
+/* Read the float_status info and return the appropriate FPSR value */
+uint32_t vfp_get_fpsr_from_host(CPUARMState *env);
+/* Clear the exception status flags from all float_status fields */
+void vfp_clear_float_status_exc_flags(CPUARMState *env);
+/*
+ * Update float_status fields to handle the bits of the FPCR
+ * specified by mask changing to the values in val.
+ */
+void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask);
 
 #endif
diff --git a/target/arm/meson.build b/target/arm/meson.build
index 2e10464dbb..3065081d24 100644
--- a/target/arm/meson.build
+++ b/target/arm/meson.build
@@ -4,7 +4,7 @@ arm_ss.add(files(
   'debug_helper.c',
   'gdbstub.c',
   'helper.c',
-  'vfp_helper.c',
+  'vfp_fpscr.c',
 ))
 arm_ss.add(zlib)
 
diff --git a/target/arm/tcg-stubs.c b/target/arm/tcg-stubs.c
index f3f45d54f2..93a15cad61 100644
--- a/target/arm/tcg-stubs.c
+++ b/target/arm/tcg-stubs.c
@@ -30,3 +30,25 @@ void assert_hflags_rebuild_correctly(CPUARMState *env)
 void define_tlb_insn_regs(ARMCPU *cpu)
 {
 }
+
+/* With KVM, we never use float_status, so these can be no-ops */
+void arm_set_default_fp_behaviours(float_status *s)
+{
+}
+
+void arm_set_ah_fp_behaviours(float_status *s)
+{
+}
+
+uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
+{
+    return 0;
+}
+
+void vfp_clear_float_status_exc_flags(CPUARMState *env)
+{
+}
+
+void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+}
diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build
index 1f9077c372..dd12ccedb1 100644
--- a/target/arm/tcg/meson.build
+++ b/target/arm/tcg/meson.build
@@ -41,6 +41,7 @@ arm_ss.add(files(
   'vec_helper.c',
   'tlb-insns.c',
   'arith_helper.c',
+  'vfp_helper.c',
 ))
 
 arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
diff --git a/target/arm/vfp_helper.c b/target/arm/tcg/vfp_helper.c
index 5d424477a2..b32e2f4e27 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/tcg/vfp_helper.c
@@ -23,13 +23,7 @@
 #include "internals.h"
 #include "cpu-features.h"
 #include "fpu/softfloat.h"
-#ifdef CONFIG_TCG
 #include "qemu/log.h"
-#endif
-
-/* VFP support.  We follow the convention used for VFP instructions:
-   Single precision routines have a "s" suffix, double precision a
-   "d" suffix.  */
 
 /*
  * Set the float_status behaviour to match the Arm defaults:
@@ -75,8 +69,6 @@ void arm_set_ah_fp_behaviours(float_status *s)
     set_float_default_nan_pattern(0b11000000, s);
 }
 
-#ifdef CONFIG_TCG
-
 /* Convert host exception flags to vfp form.  */
 static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah)
 {
@@ -113,7 +105,7 @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah)
     return target_bits;
 }
 
-static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
+uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
 {
     uint32_t a32_flags = 0, a64_flags = 0;
 
@@ -148,7 +140,7 @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
         vfp_exceptbits_from_host(a32_flags, false);
 }
 
-static void vfp_clear_float_status_exc_flags(CPUARMState *env)
+void vfp_clear_float_status_exc_flags(CPUARMState *env)
 {
     /*
      * Clear out all the exception-flag information in the float_status
@@ -176,7 +168,7 @@ static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
     vfp_clear_float_status_exc_flags(env);
 }
 
-static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
+void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
 {
     uint64_t changed = env->vfp.fpcr;
 
@@ -261,166 +253,11 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
     }
 }
 
-#else
-
-static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
-{
-    return 0;
-}
-
-static void vfp_clear_float_status_exc_flags(CPUARMState *env)
-{
-}
-
-static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
-{
-}
-
-#endif
-
-uint32_t vfp_get_fpcr(CPUARMState *env)
-{
-    uint32_t fpcr = env->vfp.fpcr
-        | (env->vfp.vec_len << 16)
-        | (env->vfp.vec_stride << 20);
-
-    /*
-     * M-profile LTPSIZE is the same bits [18:16] as A-profile Len; whichever
-     * of the two is not applicable to this CPU will always be zero.
-     */
-    fpcr |= env->v7m.ltpsize << 16;
-
-    return fpcr;
-}
-
-uint32_t vfp_get_fpsr(CPUARMState *env)
-{
-    uint32_t fpsr = env->vfp.fpsr;
-    uint32_t i;
-
-    fpsr |= vfp_get_fpsr_from_host(env);
-
-    i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
-    fpsr |= i ? FPSR_QC : 0;
-    return fpsr;
-}
-
-uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
-{
-    return (vfp_get_fpcr(env) & FPSCR_FPCR_MASK) |
-        (vfp_get_fpsr(env) & FPSCR_FPSR_MASK);
-}
-
-uint32_t vfp_get_fpscr(CPUARMState *env)
-{
-    return HELPER(vfp_get_fpscr)(env);
-}
-
-void vfp_set_fpsr(CPUARMState *env, uint32_t val)
-{
-    ARMCPU *cpu = env_archcpu(env);
-
-    if (arm_feature(env, ARM_FEATURE_NEON) ||
-        cpu_isar_feature(aa32_mve, cpu)) {
-        /*
-         * The bit we set within vfp.qc[] is arbitrary; the array as a
-         * whole being zero/non-zero is what counts.
-         */
-        env->vfp.qc[0] = val & FPSR_QC;
-        env->vfp.qc[1] = 0;
-        env->vfp.qc[2] = 0;
-        env->vfp.qc[3] = 0;
-    }
-
-    /*
-     * NZCV lives only in env->vfp.fpsr. The cumulative exception flags
-     * IOC|DZC|OFC|UFC|IXC|IDC also live in env->vfp.fpsr, with possible
-     * extra pending exception information that hasn't yet been folded in
-     * living in the float_status values (for TCG).
-     * Since this FPSR write gives us the up to date values of the exception
-     * flags, we want to store into vfp.fpsr the NZCV and CEXC bits, zeroing
-     * anything else. We also need to clear out the float_status exception
-     * information so that the next vfp_get_fpsr does not fold in stale data.
-     */
-    val &= FPSR_NZCV_MASK | FPSR_CEXC_MASK;
-    env->vfp.fpsr = val;
-    vfp_clear_float_status_exc_flags(env);
-}
-
-static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
-{
-    /*
-     * We only set FPCR bits defined by mask, and leave the others alone.
-     * We assume the mask is sensible (e.g. doesn't try to set only
-     * part of a field)
-     */
-    ARMCPU *cpu = env_archcpu(env);
-
-    /* When ARMv8.2-FP16 is not supported, FZ16 is RES0.  */
-    if (!cpu_isar_feature(any_fp16, cpu)) {
-        val &= ~FPCR_FZ16;
-    }
-    if (!cpu_isar_feature(aa64_afp, cpu)) {
-        val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP);
-    }
-
-    if (!cpu_isar_feature(aa64_ebf16, cpu)) {
-        val &= ~FPCR_EBF;
-    }
-
-    vfp_set_fpcr_to_host(env, val, mask);
-
-    if (mask & (FPCR_LEN_MASK | FPCR_STRIDE_MASK)) {
-        if (!arm_feature(env, ARM_FEATURE_M)) {
-            /*
-             * Short-vector length and stride; on M-profile these bits
-             * are used for different purposes.
-             * We can't make this conditional be "if MVFR0.FPShVec != 0",
-             * because in v7A no-short-vector-support cores still had to
-             * allow Stride/Len to be written with the only effect that
-             * some insns are required to UNDEF if the guest sets them.
-             */
-            env->vfp.vec_len = extract32(val, 16, 3);
-            env->vfp.vec_stride = extract32(val, 20, 2);
-        } else if (cpu_isar_feature(aa32_mve, cpu)) {
-            env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
-                                         FPCR_LTPSIZE_LENGTH);
-        }
-    }
-
-    /*
-     * We don't implement trapped exception handling, so the
-     * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
-     *
-     * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16,
-     * FIZ, AH, and NEP.
-     * Len, Stride and LTPSIZE we just handled. Store those bits
-     * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI
-     * bits.
-     */
-    val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 |
-        FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP;
-    env->vfp.fpcr &= ~mask;
-    env->vfp.fpcr |= val;
-}
-
-void vfp_set_fpcr(CPUARMState *env, uint32_t val)
-{
-    vfp_set_fpcr_masked(env, val, MAKE_64BIT_MASK(0, 32));
-}
-
-void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
-{
-    vfp_set_fpcr_masked(env, val, FPSCR_FPCR_MASK);
-    vfp_set_fpsr(env, val & FPSCR_FPSR_MASK);
-}
-
-void vfp_set_fpscr(CPUARMState *env, uint32_t val)
-{
-    HELPER(vfp_set_fpscr)(env, val);
-}
-
-#ifdef CONFIG_TCG
+/*
+ * VFP support.  We follow the convention used for VFP instructions:
+ * Single precision routines have a "s" suffix, double precision a
+ * "d" suffix.
+ */
 
 #define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
 
@@ -1520,4 +1357,12 @@ void HELPER(check_hcr_el2_trap)(CPUARMState *env, uint32_t rt, uint32_t reg)
     raise_exception(env, EXCP_HYP_TRAP, syndrome, 2);
 }
 
-#endif
+uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
+{
+    return vfp_get_fpscr(env);
+}
+
+void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
+{
+    vfp_set_fpscr(env, val);
+}
diff --git a/target/arm/vfp_fpscr.c b/target/arm/vfp_fpscr.c
new file mode 100644
index 0000000000..92ea60ebbf
--- /dev/null
+++ b/target/arm/vfp_fpscr.c
@@ -0,0 +1,155 @@
+/*
+ * ARM VFP floating-point: handling of FPSCR/FPCR/FPSR
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "cpu-features.h"
+
+uint32_t vfp_get_fpcr(CPUARMState *env)
+{
+    uint32_t fpcr = env->vfp.fpcr
+        | (env->vfp.vec_len << 16)
+        | (env->vfp.vec_stride << 20);
+
+    /*
+     * M-profile LTPSIZE is the same bits [18:16] as A-profile Len; whichever
+     * of the two is not applicable to this CPU will always be zero.
+     */
+    fpcr |= env->v7m.ltpsize << 16;
+
+    return fpcr;
+}
+
+uint32_t vfp_get_fpsr(CPUARMState *env)
+{
+    uint32_t fpsr = env->vfp.fpsr;
+    uint32_t i;
+
+    fpsr |= vfp_get_fpsr_from_host(env);
+
+    i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
+    fpsr |= i ? FPSR_QC : 0;
+    return fpsr;
+}
+
+uint32_t vfp_get_fpscr(CPUARMState *env)
+{
+    return (vfp_get_fpcr(env) & FPSCR_FPCR_MASK) |
+        (vfp_get_fpsr(env) & FPSCR_FPSR_MASK);
+}
+
+void vfp_set_fpsr(CPUARMState *env, uint32_t val)
+{
+    ARMCPU *cpu = env_archcpu(env);
+
+    if (arm_feature(env, ARM_FEATURE_NEON) ||
+        cpu_isar_feature(aa32_mve, cpu)) {
+        /*
+         * The bit we set within vfp.qc[] is arbitrary; the array as a
+         * whole being zero/non-zero is what counts.
+         */
+        env->vfp.qc[0] = val & FPSR_QC;
+        env->vfp.qc[1] = 0;
+        env->vfp.qc[2] = 0;
+        env->vfp.qc[3] = 0;
+    }
+
+    /*
+     * NZCV lives only in env->vfp.fpsr. The cumulative exception flags
+     * IOC|DZC|OFC|UFC|IXC|IDC also live in env->vfp.fpsr, with possible
+     * extra pending exception information that hasn't yet been folded in
+     * living in the float_status values (for TCG).
+     * Since this FPSR write gives us the up to date values of the exception
+     * flags, we want to store into vfp.fpsr the NZCV and CEXC bits, zeroing
+     * anything else. We also need to clear out the float_status exception
+     * information so that the next vfp_get_fpsr does not fold in stale data.
+     */
+    val &= FPSR_NZCV_MASK | FPSR_CEXC_MASK;
+    env->vfp.fpsr = val;
+    vfp_clear_float_status_exc_flags(env);
+}
+
+static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+    /*
+     * We only set FPCR bits defined by mask, and leave the others alone.
+     * We assume the mask is sensible (e.g. doesn't try to set only
+     * part of a field)
+     */
+    ARMCPU *cpu = env_archcpu(env);
+
+    /* When ARMv8.2-FP16 is not supported, FZ16 is RES0.  */
+    if (!cpu_isar_feature(any_fp16, cpu)) {
+        val &= ~FPCR_FZ16;
+    }
+    if (!cpu_isar_feature(aa64_afp, cpu)) {
+        val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP);
+    }
+
+    if (!cpu_isar_feature(aa64_ebf16, cpu)) {
+        val &= ~FPCR_EBF;
+    }
+
+    vfp_set_fpcr_to_host(env, val, mask);
+
+    if (mask & (FPCR_LEN_MASK | FPCR_STRIDE_MASK)) {
+        if (!arm_feature(env, ARM_FEATURE_M)) {
+            /*
+             * Short-vector length and stride; on M-profile these bits
+             * are used for different purposes.
+             * We can't make this conditional be "if MVFR0.FPShVec != 0",
+             * because in v7A no-short-vector-support cores still had to
+             * allow Stride/Len to be written with the only effect that
+             * some insns are required to UNDEF if the guest sets them.
+             */
+            env->vfp.vec_len = extract32(val, 16, 3);
+            env->vfp.vec_stride = extract32(val, 20, 2);
+        } else if (cpu_isar_feature(aa32_mve, cpu)) {
+            env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
+                                         FPCR_LTPSIZE_LENGTH);
+        }
+    }
+
+    /*
+     * We don't implement trapped exception handling, so the
+     * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
+     *
+     * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16,
+     * FIZ, AH, and NEP.
+     * Len, Stride and LTPSIZE we just handled. Store those bits
+     * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI
+     * bits.
+     */
+    val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 |
+        FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP;
+    env->vfp.fpcr &= ~mask;
+    env->vfp.fpcr |= val;
+}
+
+void vfp_set_fpcr(CPUARMState *env, uint32_t val)
+{
+    vfp_set_fpcr_masked(env, val, MAKE_64BIT_MASK(0, 32));
+}
+
+void vfp_set_fpscr(CPUARMState *env, uint32_t val)
+{
+    vfp_set_fpcr_masked(env, val, FPSCR_FPCR_MASK);
+    vfp_set_fpsr(env, val & FPSCR_FPSR_MASK);
+}