summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--docs/vfio-ap.txt61
-rw-r--r--hw/s390x/ap-bridge.c12
-rw-r--r--hw/vfio/ap.c2
-rw-r--r--target/s390x/cc_helper.c8
-rw-r--r--target/s390x/excp_helper.c46
-rw-r--r--target/s390x/fpu_helper.c541
-rw-r--r--target/s390x/gen-features.c5
-rw-r--r--target/s390x/helper.c40
-rw-r--r--target/s390x/helper.h9
-rw-r--r--target/s390x/insn-data.def16
-rw-r--r--target/s390x/insn-format.def2
-rw-r--r--target/s390x/internal.h14
-rw-r--r--target/s390x/translate.c409
13 files changed, 792 insertions, 373 deletions
diff --git a/docs/vfio-ap.txt b/docs/vfio-ap.txt
index 12339684cd..8cd060a01e 100644
--- a/docs/vfio-ap.txt
+++ b/docs/vfio-ap.txt
@@ -440,8 +440,7 @@ unassign_control_domain
    'unassign_domain' file. This may be done multiple times to unassign more than
    one control domain.
 
-Notes: Hot plug/unplug is not currently supported for mediated AP matrix
-devices, so no changes to the AP matrix will be allowed while a guest using
+Notes: No changes to the AP matrix will be allowed while a guest using
 the mediated matrix device is running. Attempts to assign an adapter,
 domain or control domain will be rejected and an error (EBUSY) returned.
 
@@ -562,6 +561,54 @@ facilities:
                      for guest usage, no AP devices can be made accessible to a
                      guest started without APFT installed.
 
+Hot plug a vfio-ap device into a running guest:
+==============================================
+Only one vfio-ap device can be attached to the virtual machine's ap-bus, so a
+vfio-ap device can be hot plugged if and only if no vfio-ap device is attached
+to the bus already, whether via the QEMU command line or a prior hot plug
+action.
+
+To hot plug a vfio-ap device, use the QEMU device_add command:
+
+    (qemu) device_add vfio-ap,sysfsdev="$path-to-mdev"
+
+    Where the '$path-to-mdev' value specifies the absolute path to a mediated
+    device to which AP resources to be used by the guest have been assigned.
+
+Note that on Linux guests, the AP devices will be created in the
+/sys/bus/ap/devices directory when the AP bus subsequently performs its periodic
+scan, so there may be a short delay before the AP devices are accessible on the
+guest.
+
+The command will fail if:
+
+* A vfio-ap device has already been attached to the virtual machine's ap-bus.
+
+* The CPU model features for controlling guest access to AP facilities are not
+  enabled (see 'CPU model features' subsection in the previous section).
+
+Hot unplug a vfio-ap device from a running guest:
+================================================
+A vfio-ap device can be unplugged from a running KVM guest if a vfio-ap device
+has been attached to the virtual machine's ap-bus via the QEMU command line
+or a prior hot plug action.
+
+To hot unplug a vfio-ap device, use the QEMU device_del command:
+
+    (qemu) device_del vfio-ap,sysfsdev="$path-to-mdev"
+
+    Where $path-to-mdev is the same as the path specified when the vfio-ap
+    device was attached to the virtual machine's ap-bus.
+
+On a Linux guest, the AP devices will be removed from the /sys/bus/ap/devices
+directory on the guest when the AP bus subsequently performs its periodic scan,
+so there may be a short delay before the AP devices are no longer accessible by
+the guest.
+
+The command will fail if the $path-to-mdev specified on the device_del command
+does not match the value specified when the vfio-ap device was attached to
+the virtual machine's ap-bus.
+
 Example: Configure AP Matrixes for Three Linux Guests:
 =====================================================
 Let's now provide an example to illustrate how KVM guests may be given
@@ -819,7 +866,11 @@ Limitations
   assigned lest the host be given access to the private data of the AP queue
   device, such as a private key configured specifically for the guest.
 
-* Dynamically modifying the AP matrix for a running guest (which would amount to
-  hot(un)plug of AP devices for the guest) is currently not supported
+* Dynamically assigning AP resources to or unassigning AP resources from a
+  mediated matrix device - see 'Configuring an AP matrix for a linux guest'
+  section above - while a running guest is using it is currently not supported.
 
-* Live guest migration is not supported for guests using AP devices.
+* Live guest migration is not supported for guests using AP devices. If a guest
+  is using AP devices, the vfio-ap device configured for the guest must be
+  unplugged before migrating the guest (see 'Hot unplug a vfio-ap device from a
+  running guest' section above.
diff --git a/hw/s390x/ap-bridge.c b/hw/s390x/ap-bridge.c
index 3795d30dd7..75ad0a66b9 100644
--- a/hw/s390x/ap-bridge.c
+++ b/hw/s390x/ap-bridge.c
@@ -39,6 +39,7 @@ static const TypeInfo ap_bus_info = {
 void s390_init_ap(void)
 {
     DeviceState *dev;
+    BusState *bus;
 
     /* If no AP instructions then no need for AP bridge */
     if (!s390_has_feat(S390_FEAT_AP)) {
@@ -52,13 +53,18 @@ void s390_init_ap(void)
     qdev_init_nofail(dev);
 
     /* Create bus on bridge device */
-    qbus_create(TYPE_AP_BUS, dev, TYPE_AP_BUS);
+    bus = qbus_create(TYPE_AP_BUS, dev, TYPE_AP_BUS);
+
+    /* Enable hotplugging */
+    qbus_set_hotplug_handler(bus, OBJECT(dev), &error_abort);
  }
 
 static void ap_bridge_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
 
+    hc->unplug = qdev_simple_device_unplug_cb;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 }
 
@@ -67,6 +73,10 @@ static const TypeInfo ap_bridge_info = {
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = 0,
     .class_init    = ap_bridge_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_HOTPLUG_HANDLER },
+        { }
+    }
 };
 
 static void ap_register(void)
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 6166ccd47a..d8b79ebe53 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -169,7 +169,7 @@ static void vfio_ap_class_init(ObjectClass *klass, void *data)
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
     dc->realize = vfio_ap_realize;
     dc->unrealize = vfio_ap_unrealize;
-    dc->hotpluggable = false;
+    dc->hotpluggable = true;
     dc->reset = vfio_ap_reset;
     dc->bus_type = TYPE_AP_BUS;
 }
diff --git a/target/s390x/cc_helper.c b/target/s390x/cc_helper.c
index 307ad61aee..0e467bf2b6 100644
--- a/target/s390x/cc_helper.c
+++ b/target/s390x/cc_helper.c
@@ -397,6 +397,11 @@ static uint32_t cc_calc_flogr(uint64_t dst)
     return dst ? 2 : 0;
 }
 
+static uint32_t cc_calc_lcbb(uint64_t dst)
+{
+    return dst == 16 ? 0 : 3;
+}
+
 static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
                                   uint64_t src, uint64_t dst, uint64_t vr)
 {
@@ -506,6 +511,9 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
     case CC_OP_FLOGR:
         r = cc_calc_flogr(dst);
         break;
+    case CC_OP_LCBB:
+        r = cc_calc_lcbb(dst);
+        break;
 
     case CC_OP_NZ_F32:
         r = set_cc_nz_f32(dst);
diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c
index a758649f47..f84bfb1284 100644
--- a/target/s390x/excp_helper.c
+++ b/target/s390x/excp_helper.c
@@ -347,10 +347,41 @@ static void do_io_interrupt(CPUS390XState *env)
     load_psw(env, mask, addr);
 }
 
+typedef struct MchkExtSaveArea {
+    uint64_t    vregs[32][2];                     /* 0x0000 */
+    uint8_t     pad_0x0200[0x0400 - 0x0200];      /* 0x0200 */
+} MchkExtSaveArea;
+QEMU_BUILD_BUG_ON(sizeof(MchkExtSaveArea) != 1024);
+
+static int mchk_store_vregs(CPUS390XState *env, uint64_t mcesao)
+{
+    hwaddr len = sizeof(MchkExtSaveArea);
+    MchkExtSaveArea *sa;
+    int i;
+
+    sa = cpu_physical_memory_map(mcesao, &len, 1);
+    if (!sa) {
+        return -EFAULT;
+    }
+    if (len != sizeof(MchkExtSaveArea)) {
+        cpu_physical_memory_unmap(sa, len, 1, 0);
+        return -EFAULT;
+    }
+
+    for (i = 0; i < 32; i++) {
+        sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0].ll);
+        sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1].ll);
+    }
+
+    cpu_physical_memory_unmap(sa, len, 1, len);
+    return 0;
+}
+
 static void do_mchk_interrupt(CPUS390XState *env)
 {
     QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
-    uint64_t mask, addr;
+    uint64_t mcic = s390_build_validity_mcic() | MCIC_SC_CP;
+    uint64_t mask, addr, mcesao = 0;
     LowCore *lowcore;
     int i;
 
@@ -362,6 +393,17 @@ static void do_mchk_interrupt(CPUS390XState *env)
 
     lowcore = cpu_map_lowcore(env);
 
+    /* extended save area */
+    if (mcic & MCIC_VB_VR) {
+        /* length and alignment is 1024 bytes */
+        mcesao = be64_to_cpu(lowcore->mcesad) & ~0x3ffull;
+    }
+
+    /* try to store vector registers */
+    if (!mcesao || mchk_store_vregs(env, mcesao)) {
+        mcic &= ~MCIC_VB_VR;
+    }
+
     /* we are always in z/Architecture mode */
     lowcore->ar_access_id = 1;
 
@@ -377,7 +419,7 @@ static void do_mchk_interrupt(CPUS390XState *env)
     lowcore->cpu_timer_save_area = cpu_to_be64(env->cputm);
     lowcore->clock_comp_save_area = cpu_to_be64(env->ckc >> 8);
 
-    lowcore->mcic = cpu_to_be64(s390_build_validity_mcic() | MCIC_SC_CP);
+    lowcore->mcic = cpu_to_be64(mcic);
     lowcore->mcck_old_psw.mask = cpu_to_be64(get_psw_mask(env));
     lowcore->mcck_old_psw.addr = cpu_to_be64(env->psw.addr);
     mask = be64_to_cpu(lowcore->mcck_new_psw.mask);
diff --git a/target/s390x/fpu_helper.c b/target/s390x/fpu_helper.c
index e921172bc4..1be68bafea 100644
--- a/target/s390x/fpu_helper.c
+++ b/target/s390x/fpu_helper.c
@@ -36,13 +36,21 @@
 
 #define RET128(F) (env->retxl = F.low, F.high)
 
-#define convert_bit(mask, from, to) \
-    (to < from                      \
-     ? (mask / (from / to)) & to    \
-     : (mask & from) * (to / from))
+uint8_t s390_softfloat_exc_to_ieee(unsigned int exc)
+{
+    uint8_t s390_exc = 0;
+
+    s390_exc |= (exc & float_flag_invalid) ? S390_IEEE_MASK_INVALID : 0;
+    s390_exc |= (exc & float_flag_divbyzero) ? S390_IEEE_MASK_DIVBYZERO : 0;
+    s390_exc |= (exc & float_flag_overflow) ? S390_IEEE_MASK_OVERFLOW : 0;
+    s390_exc |= (exc & float_flag_underflow) ? S390_IEEE_MASK_UNDERFLOW : 0;
+    s390_exc |= (exc & float_flag_inexact) ? S390_IEEE_MASK_INEXACT : 0;
+
+    return s390_exc;
+}
 
 /* Should be called after any operation that may raise IEEE exceptions.  */
-static void handle_exceptions(CPUS390XState *env, uintptr_t retaddr)
+static void handle_exceptions(CPUS390XState *env, bool XxC, uintptr_t retaddr)
 {
     unsigned s390_exc, qemu_exc;
 
@@ -53,22 +61,54 @@ static void handle_exceptions(CPUS390XState *env, uintptr_t retaddr)
         return;
     }
     env->fpu_status.float_exception_flags = 0;
+    s390_exc = s390_softfloat_exc_to_ieee(qemu_exc);
+
+    /*
+     * IEEE-Underflow exception recognition exists if a tininess condition
+     * (underflow) exists and
+     * - The mask bit in the FPC is zero and the result is inexact
+     * - The mask bit in the FPC is one
+     * So tininess conditions that are not inexact don't trigger any
+     * underflow action in case the mask bit is not one.
+     */
+    if (!(s390_exc & S390_IEEE_MASK_INEXACT) &&
+        !((env->fpc >> 24) & S390_IEEE_MASK_UNDERFLOW)) {
+        s390_exc &= ~S390_IEEE_MASK_UNDERFLOW;
+    }
 
-    /* Convert softfloat exception bits to s390 exception bits.  */
-    s390_exc = 0;
-    s390_exc |= convert_bit(qemu_exc, float_flag_invalid, 0x80);
-    s390_exc |= convert_bit(qemu_exc, float_flag_divbyzero, 0x40);
-    s390_exc |= convert_bit(qemu_exc, float_flag_overflow, 0x20);
-    s390_exc |= convert_bit(qemu_exc, float_flag_underflow, 0x10);
-    s390_exc |= convert_bit(qemu_exc, float_flag_inexact, 0x08);
-
-    /* Install the exceptions that we raised.  */
-    env->fpc |= s390_exc << 16;
+    /*
+     * FIXME:
+     * 1. Right now, all inexact conditions are inidicated as
+     *    "truncated" (0) and never as "incremented" (1) in the DXC.
+     * 2. Only traps due to invalid/divbyzero are suppressing. Other traps
+     *    are completing, meaning the target register has to be written!
+     *    This, however will mean that we have to write the register before
+     *    triggering the trap - impossible right now.
+     */
+
+    /*
+     * invalid/divbyzero cannot coexist with other conditions.
+     * overflow/underflow however can coexist with inexact, we have to
+     * handle it separatly.
+     */
+    if (s390_exc & ~S390_IEEE_MASK_INEXACT) {
+        if (s390_exc & ~S390_IEEE_MASK_INEXACT & env->fpc >> 24) {
+            /* trap condition - inexact reported along */
+            tcg_s390_data_exception(env, s390_exc, retaddr);
+        }
+        /* nontrap condition - inexact handled differently */
+        env->fpc |= (s390_exc & ~S390_IEEE_MASK_INEXACT) << 16;
+    }
 
-    /* Send signals for enabled exceptions.  */
-    s390_exc &= env->fpc >> 24;
-    if (s390_exc) {
-        tcg_s390_data_exception(env, s390_exc, retaddr);
+    /* inexact handling */
+    if (s390_exc & S390_IEEE_MASK_INEXACT && !XxC) {
+        /* trap condition - overflow/underflow _not_ reported along */
+        if (s390_exc & S390_IEEE_MASK_INEXACT & env->fpc >> 24) {
+            tcg_s390_data_exception(env, s390_exc & S390_IEEE_MASK_INEXACT,
+                                    retaddr);
+        }
+        /* nontrap condition */
+        env->fpc |= (s390_exc & S390_IEEE_MASK_INEXACT) << 16;
     }
 }
 
@@ -130,11 +170,22 @@ uint32_t set_cc_nz_f128(float128 v)
     }
 }
 
+static inline uint8_t round_from_m34(uint32_t m34)
+{
+    return extract32(m34, 0, 4);
+}
+
+static inline bool xxc_from_m34(uint32_t m34)
+{
+    /* XxC is bit 1 of m4 */
+    return extract32(m34, 4 + 3 - 1, 1);
+}
+
 /* 32-bit FP addition */
 uint64_t HELPER(aeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
     float32 ret = float32_add(f1, f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
@@ -142,7 +193,7 @@ uint64_t HELPER(aeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 uint64_t HELPER(adb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
     float64 ret = float64_add(f1, f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
@@ -153,7 +204,7 @@ uint64_t HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al,
     float128 ret = float128_add(make_float128(ah, al),
                                 make_float128(bh, bl),
                                 &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return RET128(ret);
 }
 
@@ -161,7 +212,7 @@ uint64_t HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al,
 uint64_t HELPER(seb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
     float32 ret = float32_sub(f1, f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
@@ -169,7 +220,7 @@ uint64_t HELPER(seb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 uint64_t HELPER(sdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
     float64 ret = float64_sub(f1, f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
@@ -180,7 +231,7 @@ uint64_t HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
     float128 ret = float128_sub(make_float128(ah, al),
                                 make_float128(bh, bl),
                                 &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return RET128(ret);
 }
 
@@ -188,7 +239,7 @@ uint64_t HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
 uint64_t HELPER(deb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
     float32 ret = float32_div(f1, f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
@@ -196,7 +247,7 @@ uint64_t HELPER(deb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 uint64_t HELPER(ddb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
     float64 ret = float64_div(f1, f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
@@ -207,7 +258,7 @@ uint64_t HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
     float128 ret = float128_div(make_float128(ah, al),
                                 make_float128(bh, bl),
                                 &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return RET128(ret);
 }
 
@@ -215,7 +266,7 @@ uint64_t HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
 uint64_t HELPER(meeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
     float32 ret = float32_mul(f1, f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
@@ -223,7 +274,7 @@ uint64_t HELPER(meeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 uint64_t HELPER(mdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
     float64 ret = float64_mul(f1, f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
@@ -232,7 +283,7 @@ uint64_t HELPER(mdeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
     float64 ret = float32_to_float64(f2, &env->fpu_status);
     ret = float64_mul(f1, ret, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
@@ -243,7 +294,7 @@ uint64_t HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
     float128 ret = float128_mul(make_float128(ah, al),
                                 make_float128(bh, bl),
                                 &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return RET128(ret);
 }
 
@@ -253,7 +304,7 @@ uint64_t HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al,
 {
     float128 ret = float64_to_float128(f2, &env->fpu_status);
     ret = float128_mul(make_float128(ah, al), ret, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return RET128(ret);
 }
 
@@ -261,15 +312,19 @@ uint64_t HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al,
 uint64_t HELPER(ldeb)(CPUS390XState *env, uint64_t f2)
 {
     float64 ret = float32_to_float64(f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
 /* convert 128-bit float to 64-bit float */
-uint64_t HELPER(ldxb)(CPUS390XState *env, uint64_t ah, uint64_t al)
+uint64_t HELPER(ldxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                      uint32_t m34)
 {
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     float64 ret = float128_to_float64(make_float128(ah, al), &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
@@ -277,7 +332,7 @@ uint64_t HELPER(ldxb)(CPUS390XState *env, uint64_t ah, uint64_t al)
 uint64_t HELPER(lxdb)(CPUS390XState *env, uint64_t f2)
 {
     float128 ret = float64_to_float128(f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return RET128(ret);
 }
 
@@ -285,23 +340,30 @@ uint64_t HELPER(lxdb)(CPUS390XState *env, uint64_t f2)
 uint64_t HELPER(lxeb)(CPUS390XState *env, uint64_t f2)
 {
     float128 ret = float32_to_float128(f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return RET128(ret);
 }
 
 /* convert 64-bit float to 32-bit float */
-uint64_t HELPER(ledb)(CPUS390XState *env, uint64_t f2)
+uint64_t HELPER(ledb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
 {
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     float32 ret = float64_to_float32(f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 128-bit float to 32-bit float */
-uint64_t HELPER(lexb)(CPUS390XState *env, uint64_t ah, uint64_t al)
+uint64_t HELPER(lexb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                      uint32_t m34)
 {
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     float32 ret = float128_to_float32(make_float128(ah, al), &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
@@ -309,7 +371,7 @@ uint64_t HELPER(lexb)(CPUS390XState *env, uint64_t ah, uint64_t al)
 uint32_t HELPER(ceb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
     int cmp = float32_compare_quiet(f1, f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return float_comp_to_cc(env, cmp);
 }
 
@@ -317,7 +379,7 @@ uint32_t HELPER(ceb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 uint32_t HELPER(cdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
     int cmp = float64_compare_quiet(f1, f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return float_comp_to_cc(env, cmp);
 }
 
@@ -328,21 +390,28 @@ uint32_t HELPER(cxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
     int cmp = float128_compare_quiet(make_float128(ah, al),
                                      make_float128(bh, bl),
                                      &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return float_comp_to_cc(env, cmp);
 }
 
-static int swap_round_mode(CPUS390XState *env, int m3)
+int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3)
 {
     int ret = env->fpu_status.float_rounding_mode;
+
     switch (m3) {
     case 0:
         /* current mode */
         break;
     case 1:
-        /* biased round no nearest */
+        /* round to nearest with ties away from 0 */
+        set_float_rounding_mode(float_round_ties_away, &env->fpu_status);
+        break;
+    case 3:
+        /* round to prepare for shorter precision */
+        set_float_rounding_mode(float_round_to_odd, &env->fpu_status);
+        break;
     case 4:
-        /* round to nearest */
+        /* round to nearest with ties to even */
         set_float_rounding_mode(float_round_nearest_even, &env->fpu_status);
         break;
     case 5:
@@ -357,226 +426,251 @@ static int swap_round_mode(CPUS390XState *env, int m3)
         /* round to -inf */
         set_float_rounding_mode(float_round_down, &env->fpu_status);
         break;
+    default:
+        g_assert_not_reached();
     }
     return ret;
 }
 
+void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode)
+{
+    set_float_rounding_mode(old_mode, &env->fpu_status);
+}
+
 /* convert 64-bit int to 32-bit float */
-uint64_t HELPER(cegb)(CPUS390XState *env, int64_t v2, uint32_t m3)
+uint64_t HELPER(cegb)(CPUS390XState *env, int64_t v2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     float32 ret = int64_to_float32(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 64-bit int to 64-bit float */
-uint64_t HELPER(cdgb)(CPUS390XState *env, int64_t v2, uint32_t m3)
+uint64_t HELPER(cdgb)(CPUS390XState *env, int64_t v2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     float64 ret = int64_to_float64(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 64-bit int to 128-bit float */
-uint64_t HELPER(cxgb)(CPUS390XState *env, int64_t v2, uint32_t m3)
+uint64_t HELPER(cxgb)(CPUS390XState *env, int64_t v2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     float128 ret = int64_to_float128(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return RET128(ret);
 }
 
 /* convert 64-bit uint to 32-bit float */
-uint64_t HELPER(celgb)(CPUS390XState *env, uint64_t v2, uint32_t m3)
+uint64_t HELPER(celgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     float32 ret = uint64_to_float32(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 64-bit uint to 64-bit float */
-uint64_t HELPER(cdlgb)(CPUS390XState *env, uint64_t v2, uint32_t m3)
+uint64_t HELPER(cdlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     float64 ret = uint64_to_float64(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 64-bit uint to 128-bit float */
-uint64_t HELPER(cxlgb)(CPUS390XState *env, uint64_t v2, uint32_t m3)
+uint64_t HELPER(cxlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     float128 ret = uint64_to_float128(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return RET128(ret);
 }
 
 /* convert 32-bit float to 64-bit int */
-uint64_t HELPER(cgeb)(CPUS390XState *env, uint64_t v2, uint32_t m3)
+uint64_t HELPER(cgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     int64_t ret = float32_to_int64(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 64-bit float to 64-bit int */
-uint64_t HELPER(cgdb)(CPUS390XState *env, uint64_t v2, uint32_t m3)
+uint64_t HELPER(cgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     int64_t ret = float64_to_int64(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 128-bit float to 64-bit int */
-uint64_t HELPER(cgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m3)
+uint64_t HELPER(cgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     float128 v2 = make_float128(h, l);
     int64_t ret = float128_to_int64(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 32-bit float to 32-bit int */
-uint64_t HELPER(cfeb)(CPUS390XState *env, uint64_t v2, uint32_t m3)
+uint64_t HELPER(cfeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     int32_t ret = float32_to_int32(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 64-bit float to 32-bit int */
-uint64_t HELPER(cfdb)(CPUS390XState *env, uint64_t v2, uint32_t m3)
+uint64_t HELPER(cfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     int32_t ret = float64_to_int32(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 128-bit float to 32-bit int */
-uint64_t HELPER(cfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m3)
+uint64_t HELPER(cfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     float128 v2 = make_float128(h, l);
     int32_t ret = float128_to_int32(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 32-bit float to 64-bit uint */
-uint64_t HELPER(clgeb)(CPUS390XState *env, uint64_t v2, uint32_t m3)
+uint64_t HELPER(clgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     uint64_t ret;
+
     v2 = float32_to_float64(v2, &env->fpu_status);
     ret = float64_to_uint64(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 64-bit float to 64-bit uint */
-uint64_t HELPER(clgdb)(CPUS390XState *env, uint64_t v2, uint32_t m3)
+uint64_t HELPER(clgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     uint64_t ret = float64_to_uint64(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 128-bit float to 64-bit uint */
-uint64_t HELPER(clgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m3)
+uint64_t HELPER(clgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
-    float128 v2 = make_float128(h, l);
-    /* ??? Not 100% correct.  */
-    uint64_t ret = float128_to_int64(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    uint64_t ret = float128_to_uint64(make_float128(h, l), &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 32-bit float to 32-bit uint */
-uint64_t HELPER(clfeb)(CPUS390XState *env, uint64_t v2, uint32_t m3)
+uint64_t HELPER(clfeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     uint32_t ret = float32_to_uint32(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 64-bit float to 32-bit uint */
-uint64_t HELPER(clfdb)(CPUS390XState *env, uint64_t v2, uint32_t m3)
+uint64_t HELPER(clfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     uint32_t ret = float64_to_uint32(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* convert 128-bit float to 32-bit uint */
-uint64_t HELPER(clfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m3)
+uint64_t HELPER(clfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
-    float128 v2 = make_float128(h, l);
-    /* Not 100% correct.  */
-    uint32_t ret = float128_to_int64(v2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    uint32_t ret = float128_to_uint32(make_float128(h, l), &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* round to integer 32-bit */
-uint64_t HELPER(fieb)(CPUS390XState *env, uint64_t f2, uint32_t m3)
+uint64_t HELPER(fieb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     float32 ret = float32_round_to_int(f2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* round to integer 64-bit */
-uint64_t HELPER(fidb)(CPUS390XState *env, uint64_t f2, uint32_t m3)
+uint64_t HELPER(fidb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     float64 ret = float64_round_to_int(f2, &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return ret;
 }
 
 /* round to integer 128-bit */
-uint64_t HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint32_t m3)
+uint64_t HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                      uint32_t m34)
 {
-    int hold = swap_round_mode(env, m3);
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
     float128 ret = float128_round_to_int(make_float128(ah, al),
                                          &env->fpu_status);
-    set_float_rounding_mode(hold, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
     return RET128(ret);
 }
 
@@ -584,7 +678,7 @@ uint64_t HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint32_t m3)
 uint32_t HELPER(keb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
     int cmp = float32_compare(f1, f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return float_comp_to_cc(env, cmp);
 }
 
@@ -592,7 +686,7 @@ uint32_t HELPER(keb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
     int cmp = float64_compare(f1, f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return float_comp_to_cc(env, cmp);
 }
 
@@ -603,7 +697,7 @@ uint32_t HELPER(kxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
     int cmp = float128_compare(make_float128(ah, al),
                                make_float128(bh, bl),
                                &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return float_comp_to_cc(env, cmp);
 }
 
@@ -612,7 +706,7 @@ uint64_t HELPER(maeb)(CPUS390XState *env, uint64_t f1,
                       uint64_t f2, uint64_t f3)
 {
     float32 ret = float32_muladd(f2, f3, f1, 0, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
@@ -621,7 +715,7 @@ uint64_t HELPER(madb)(CPUS390XState *env, uint64_t f1,
                       uint64_t f2, uint64_t f3)
 {
     float64 ret = float64_muladd(f2, f3, f1, 0, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
@@ -631,7 +725,7 @@ uint64_t HELPER(mseb)(CPUS390XState *env, uint64_t f1,
 {
     float32 ret = float32_muladd(f2, f3, f1, float_muladd_negate_c,
                                  &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
@@ -641,78 +735,63 @@ uint64_t HELPER(msdb)(CPUS390XState *env, uint64_t f1,
 {
     float64 ret = float64_muladd(f2, f3, f1, float_muladd_negate_c,
                                  &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
+/* The rightmost bit has the number 11. */
+static inline uint16_t dcmask(int bit, bool neg)
+{
+    return 1 << (11 - bit - neg);
+}
+
+#define DEF_FLOAT_DCMASK(_TYPE) \
+static uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1)       \
+{                                                                  \
+    const bool neg = _TYPE##_is_neg(f1);                           \
+                                                                   \
+    /* Sorted by most common cases - only one class is possible */ \
+    if (_TYPE##_is_normal(f1)) {                                   \
+        return dcmask(2, neg);                                     \
+    } else if (_TYPE##_is_zero(f1)) {                              \
+        return dcmask(0, neg);                                     \
+    } else if (_TYPE##_is_denormal(f1)) {                          \
+        return dcmask(4, neg);                                     \
+    } else if (_TYPE##_is_infinity(f1)) {                          \
+        return dcmask(6, neg);                                     \
+    } else if (_TYPE##_is_quiet_nan(f1, &env->fpu_status)) {       \
+        return dcmask(8, neg);                                     \
+    }                                                              \
+    /* signaling nan, as last remaining case */                    \
+    return dcmask(10, neg);                                        \
+}
+DEF_FLOAT_DCMASK(float32)
+DEF_FLOAT_DCMASK(float64)
+DEF_FLOAT_DCMASK(float128)
+
 /* test data class 32-bit */
 uint32_t HELPER(tceb)(CPUS390XState *env, uint64_t f1, uint64_t m2)
 {
-    float32 v1 = f1;
-    int neg = float32_is_neg(v1);
-    uint32_t cc = 0;
-
-    if ((float32_is_zero(v1) && (m2 & (1 << (11-neg)))) ||
-        (float32_is_infinity(v1) && (m2 & (1 << (5-neg)))) ||
-        (float32_is_any_nan(v1) && (m2 & (1 << (3-neg)))) ||
-        (float32_is_signaling_nan(v1, &env->fpu_status) &&
-         (m2 & (1 << (1-neg))))) {
-        cc = 1;
-    } else if (m2 & (1 << (9-neg))) {
-        /* assume normalized number */
-        cc = 1;
-    }
-    /* FIXME: denormalized? */
-    return cc;
+    return (m2 & float32_dcmask(env, f1)) != 0;
 }
 
 /* test data class 64-bit */
 uint32_t HELPER(tcdb)(CPUS390XState *env, uint64_t v1, uint64_t m2)
 {
-    int neg = float64_is_neg(v1);
-    uint32_t cc = 0;
-
-    if ((float64_is_zero(v1) && (m2 & (1 << (11-neg)))) ||
-        (float64_is_infinity(v1) && (m2 & (1 << (5-neg)))) ||
-        (float64_is_any_nan(v1) && (m2 & (1 << (3-neg)))) ||
-        (float64_is_signaling_nan(v1, &env->fpu_status) &&
-         (m2 & (1 << (1-neg))))) {
-        cc = 1;
-    } else if (m2 & (1 << (9-neg))) {
-        /* assume normalized number */
-        cc = 1;
-    }
-    /* FIXME: denormalized? */
-    return cc;
+    return (m2 & float64_dcmask(env, v1)) != 0;
 }
 
 /* test data class 128-bit */
-uint32_t HELPER(tcxb)(CPUS390XState *env, uint64_t ah,
-                      uint64_t al, uint64_t m2)
-{
-    float128 v1 = make_float128(ah, al);
-    int neg = float128_is_neg(v1);
-    uint32_t cc = 0;
-
-    if ((float128_is_zero(v1) && (m2 & (1 << (11-neg)))) ||
-        (float128_is_infinity(v1) && (m2 & (1 << (5-neg)))) ||
-        (float128_is_any_nan(v1) && (m2 & (1 << (3-neg)))) ||
-        (float128_is_signaling_nan(v1, &env->fpu_status) &&
-         (m2 & (1 << (1-neg))))) {
-        cc = 1;
-    } else if (m2 & (1 << (9-neg))) {
-        /* assume normalized number */
-        cc = 1;
-    }
-    /* FIXME: denormalized? */
-    return cc;
+uint32_t HELPER(tcxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t m2)
+{
+    return (m2 & float128_dcmask(env, make_float128(ah, al))) != 0;
 }
 
 /* square root 32-bit */
 uint64_t HELPER(sqeb)(CPUS390XState *env, uint64_t f2)
 {
     float32 ret = float32_sqrt(f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
@@ -720,7 +799,7 @@ uint64_t HELPER(sqeb)(CPUS390XState *env, uint64_t f2)
 uint64_t HELPER(sqdb)(CPUS390XState *env, uint64_t f2)
 {
     float64 ret = float64_sqrt(f2, &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return ret;
 }
 
@@ -728,44 +807,84 @@ uint64_t HELPER(sqdb)(CPUS390XState *env, uint64_t f2)
 uint64_t HELPER(sqxb)(CPUS390XState *env, uint64_t ah, uint64_t al)
 {
     float128 ret = float128_sqrt(make_float128(ah, al), &env->fpu_status);
-    handle_exceptions(env, GETPC());
+    handle_exceptions(env, false, GETPC());
     return RET128(ret);
 }
 
-static const int fpc_to_rnd[4] = {
+static const int fpc_to_rnd[8] = {
     float_round_nearest_even,
     float_round_to_zero,
     float_round_up,
-    float_round_down
+    float_round_down,
+    -1,
+    -1,
+    -1,
+    float_round_to_odd,
 };
 
 /* set fpc */
 void HELPER(sfpc)(CPUS390XState *env, uint64_t fpc)
 {
+    if (fpc_to_rnd[fpc & 0x7] == -1 || fpc & 0x03030088u ||
+        (!s390_has_feat(S390_FEAT_FLOATING_POINT_EXT) && fpc & 0x4)) {
+        s390_program_interrupt(env, PGM_SPECIFICATION, ILEN_AUTO, GETPC());
+    }
+
     /* Install everything in the main FPC.  */
     env->fpc = fpc;
 
     /* Install the rounding mode in the shadow fpu_status.  */
-    set_float_rounding_mode(fpc_to_rnd[fpc & 3], &env->fpu_status);
+    set_float_rounding_mode(fpc_to_rnd[fpc & 0x7], &env->fpu_status);
 }
 
 /* set fpc and signal */
-void HELPER(sfas)(CPUS390XState *env, uint64_t val)
+void HELPER(sfas)(CPUS390XState *env, uint64_t fpc)
 {
     uint32_t signalling = env->fpc;
-    uint32_t source = val;
     uint32_t s390_exc;
 
-    /* The contents of the source operand are placed in the FPC register;
-       then the flags in the FPC register are set to the logical OR of the
-       signalling flags and the source flags.  */
-    env->fpc = source | (signalling & 0x00ff0000);
-    set_float_rounding_mode(fpc_to_rnd[source & 3], &env->fpu_status);
+    if (fpc_to_rnd[fpc & 0x7] == -1 || fpc & 0x03030088u ||
+        (!s390_has_feat(S390_FEAT_FLOATING_POINT_EXT) && fpc & 0x4)) {
+        s390_program_interrupt(env, PGM_SPECIFICATION, ILEN_AUTO, GETPC());
+    }
 
-    /* If any signalling flag is 1 and the corresponding source mask
-       is also 1, a simulated-iee-exception trap occurs.  */
-    s390_exc = (signalling >> 16) & (source >> 24);
+    /*
+     * FPC is set to the FPC operand with a bitwise OR of the signalling
+     * flags.
+     */
+    env->fpc = fpc | (signalling & 0x00ff0000);
+    set_float_rounding_mode(fpc_to_rnd[fpc & 0x7], &env->fpu_status);
+
+    /*
+     * If any signaling flag is enabled in the new FPC mask, a
+     * simulated-iee-exception exception occurs.
+     */
+    s390_exc = (signalling >> 16) & (fpc >> 24);
     if (s390_exc) {
+        if (s390_exc & S390_IEEE_MASK_INVALID) {
+            s390_exc = S390_IEEE_MASK_INVALID;
+        } else if (s390_exc & S390_IEEE_MASK_DIVBYZERO) {
+            s390_exc = S390_IEEE_MASK_DIVBYZERO;
+        } else if (s390_exc & S390_IEEE_MASK_OVERFLOW) {
+            s390_exc &= (S390_IEEE_MASK_OVERFLOW | S390_IEEE_MASK_INEXACT);
+        } else if (s390_exc & S390_IEEE_MASK_UNDERFLOW) {
+            s390_exc &= (S390_IEEE_MASK_UNDERFLOW | S390_IEEE_MASK_INEXACT);
+        } else if (s390_exc & S390_IEEE_MASK_INEXACT) {
+            s390_exc = S390_IEEE_MASK_INEXACT;
+        } else if (s390_exc & S390_IEEE_MASK_QUANTUM) {
+            s390_exc = S390_IEEE_MASK_QUANTUM;
+        }
         tcg_s390_data_exception(env, s390_exc | 3, GETPC());
     }
 }
+
+/* set bfp rounding mode */
+void HELPER(srnm)(CPUS390XState *env, uint64_t rnd)
+{
+    if (rnd > 0x7 || fpc_to_rnd[rnd & 0x7] == -1) {
+        s390_program_interrupt(env, PGM_SPECIFICATION, ILEN_AUTO, GETPC());
+    }
+
+    env->fpc = deposit32(env->fpc, 0, 3, rnd);
+    set_float_rounding_mode(fpc_to_rnd[rnd & 0x7], &env->fpu_status);
+}
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 44eca45474..e4739a6b9f 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -601,6 +601,11 @@ static uint16_t qemu_V3_1[] = {
 };
 
 static uint16_t qemu_LATEST[] = {
+    /*
+     * Only BFP bits are implemented (HFP, DFP, PFPO and DIVIDE TO INTEGER not
+     * implemented yet).
+     */
+    S390_FEAT_FLOATING_POINT_EXT,
     S390_FEAT_ZPCI,
 };
 
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index 3d74836a83..8e9573221c 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -211,7 +211,7 @@ void s390_cpu_recompute_watchpoints(CPUState *cs)
     }
 }
 
-struct sigp_save_area {
+typedef struct SigpSaveArea {
     uint64_t    fprs[16];                       /* 0x0000 */
     uint64_t    grs[16];                        /* 0x0080 */
     PSW         psw;                            /* 0x0100 */
@@ -225,13 +225,13 @@ struct sigp_save_area {
     uint8_t     pad_0x0138[0x0140 - 0x0138];    /* 0x0138 */
     uint32_t    ars[16];                        /* 0x0140 */
     uint64_t    crs[16];                        /* 0x0384 */
-};
-QEMU_BUILD_BUG_ON(sizeof(struct sigp_save_area) != 512);
+} SigpSaveArea;
+QEMU_BUILD_BUG_ON(sizeof(SigpSaveArea) != 512);
 
 int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch)
 {
     static const uint8_t ar_id = 1;
-    struct sigp_save_area *sa;
+    SigpSaveArea *sa;
     hwaddr len = sizeof(*sa);
     int i;
 
@@ -272,32 +272,43 @@ int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch)
     return 0;
 }
 
-#define ADTL_GS_OFFSET   1024 /* offset of GS data in adtl save area */
+typedef struct SigpAdtlSaveArea {
+    uint64_t    vregs[32][2];                     /* 0x0000 */
+    uint8_t     pad_0x0200[0x0400 - 0x0200];      /* 0x0200 */
+    uint64_t    gscb[4];                          /* 0x0400 */
+    uint8_t     pad_0x0420[0x1000 - 0x0420];      /* 0x0420 */
+} SigpAdtlSaveArea;
+QEMU_BUILD_BUG_ON(sizeof(SigpAdtlSaveArea) != 4096);
+
 #define ADTL_GS_MIN_SIZE 2048 /* minimal size of adtl save area for GS */
 int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len)
 {
+    SigpAdtlSaveArea *sa;
     hwaddr save = len;
-    void *mem;
+    int i;
 
-    mem = cpu_physical_memory_map(addr, &save, 1);
-    if (!mem) {
+    sa = cpu_physical_memory_map(addr, &save, 1);
+    if (!sa) {
         return -EFAULT;
     }
     if (save != len) {
-        cpu_physical_memory_unmap(mem, len, 1, 0);
+        cpu_physical_memory_unmap(sa, len, 1, 0);
         return -EFAULT;
     }
 
-    /* FIXME: as soon as TCG supports these features, convert cpu->be */
     if (s390_has_feat(S390_FEAT_VECTOR)) {
-        memcpy(mem, &cpu->env.vregs, 512);
+        for (i = 0; i < 32; i++) {
+            sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0].ll);
+            sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1].ll);
+        }
     }
     if (s390_has_feat(S390_FEAT_GUARDED_STORAGE) && len >= ADTL_GS_MIN_SIZE) {
-        memcpy(mem + ADTL_GS_OFFSET, &cpu->env.gscb, 32);
+        for (i = 0; i < 4; i++) {
+            sa->gscb[i] = cpu_to_be64(cpu->env.gscb[i]);
+        }
     }
 
-    cpu_physical_memory_unmap(mem, len, 1, len);
-
+    cpu_physical_memory_unmap(sa, len, 1, len);
     return 0;
 }
 #endif /* CONFIG_USER_ONLY */
@@ -406,6 +417,7 @@ const char *cc_name(enum cc_op cc_op)
         [CC_OP_SLA_32]    = "CC_OP_SLA_32",
         [CC_OP_SLA_64]    = "CC_OP_SLA_64",
         [CC_OP_FLOGR]     = "CC_OP_FLOGR",
+        [CC_OP_LCBB]      = "CC_OP_LCBB",
     };
 
     return cc_names[cc_op];
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 6260b50496..bb659257f6 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -53,11 +53,11 @@ DEF_HELPER_FLAGS_3(mdb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_5(mxb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64)
 DEF_HELPER_FLAGS_4(mxdb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
 DEF_HELPER_FLAGS_2(ldeb, TCG_CALL_NO_WG, i64, env, i64)
-DEF_HELPER_FLAGS_3(ldxb, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_4(ldxb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
 DEF_HELPER_FLAGS_2(lxdb, TCG_CALL_NO_WG, i64, env, i64)
 DEF_HELPER_FLAGS_2(lxeb, TCG_CALL_NO_WG, i64, env, i64)
-DEF_HELPER_FLAGS_2(ledb, TCG_CALL_NO_WG, i64, env, i64)
-DEF_HELPER_FLAGS_3(lexb, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(ledb, TCG_CALL_NO_WG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_4(lexb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
 DEF_HELPER_FLAGS_3(ceb, TCG_CALL_NO_WG_SE, i32, env, i64, i64)
 DEF_HELPER_FLAGS_3(cdb, TCG_CALL_NO_WG_SE, i32, env, i64, i64)
 DEF_HELPER_FLAGS_5(cxb, TCG_CALL_NO_WG_SE, i32, env, i64, i64, i64, i64)
@@ -104,8 +104,9 @@ DEF_HELPER_4(trtr, i32, env, i32, i64, i64)
 DEF_HELPER_5(trXX, i32, env, i32, i32, i32, i32)
 DEF_HELPER_4(cksm, i64, env, i64, i64, i64)
 DEF_HELPER_FLAGS_5(calc_cc, TCG_CALL_NO_RWG_SE, i32, env, i32, i64, i64, i64)
-DEF_HELPER_FLAGS_2(sfpc, TCG_CALL_NO_RWG, void, env, i64)
+DEF_HELPER_FLAGS_2(sfpc, TCG_CALL_NO_WG, void, env, i64)
 DEF_HELPER_FLAGS_2(sfas, TCG_CALL_NO_WG, void, env, i64)
+DEF_HELPER_FLAGS_2(srnm, TCG_CALL_NO_WG, void, env, i64)
 DEF_HELPER_FLAGS_1(popcnt, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_2(stfle, i32, env, i64)
 DEF_HELPER_FLAGS_2(lpq, TCG_CALL_NO_WG, i64, env, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 61582372ab..61b750a855 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -479,6 +479,8 @@
     F(0xb313, LCDBR,   RRE,   Z,   0, f2, new, f1, negf64, f64, IF_BFP)
     F(0xb343, LCXBR,   RRE,   Z,   x2h, x2l, new_P, x1, negf128, f128, IF_BFP)
     F(0xb373, LCDFR,   RRE,   FPSSH, 0, f2, new, f1, negf64, 0, IF_AFP1 | IF_AFP2)
+/* LOAD COUNT TO BLOCK BOUNDARY */
+    C(0xe727, LCBB,    RXE,   V,   la2, 0, r1, 0, lcbb, 0)
 /* LOAD HALFWORD */
     C(0xb927, LHR,     RRE,   EI,  0, r2_16s, 0, r1_32, mov2, 0)
     C(0xb907, LGHR,    RRE,   EI,  0, r2_16s, 0, r1, mov2, 0)
@@ -598,10 +600,12 @@
     F(0xed04, LDEB,    RXE,   Z,   0, m2_32u, new, f1, ldeb, 0, IF_BFP)
     F(0xed05, LXDB,    RXE,   Z,   0, m2_64, new_P, x1, lxdb, 0, IF_BFP)
     F(0xed06, LXEB,    RXE,   Z,   0, m2_32u, new_P, x1, lxeb, 0, IF_BFP)
+    F(0xb324, LDER,    RXE,   Z,   0, e2, new, f1, lde, 0, IF_AFP1)
+    F(0xed24, LDE,     RXE,   Z,   0, m2_32u, new, f1, lde, 0, IF_AFP1)
 /* LOAD ROUNDED */
-    F(0xb344, LEDBR,   RRE,   Z,   0, f2, new, e1, ledb, 0, IF_BFP)
-    F(0xb345, LDXBR,   RRE,   Z,   x2h, x2l, new, f1, ldxb, 0, IF_BFP)
-    F(0xb346, LEXBR,   RRE,   Z,   x2h, x2l, new, e1, lexb, 0, IF_BFP)
+    F(0xb344, LEDBR,   RRF_e, Z,   0, f2, new, e1, ledb, 0, IF_BFP)
+    F(0xb345, LDXBR,   RRF_e, Z,   x2h, x2l, new, f1, ldxb, 0, IF_BFP)
+    F(0xb346, LEXBR,   RRF_e, Z,   x2h, x2l, new, e1, lexb, 0, IF_BFP)
 
 /* LOAD MULTIPLE */
     C(0x9800, LM,      RS_a,  Z,   0, a2, 0, 0, lm32, 0)
@@ -759,10 +763,10 @@
 /* SET FPC AND SIGNAL */
     F(0xb385, SFASR,   RRE,   IEEEE_SIM, 0, r1_o, 0, 0, sfas, 0, IF_DFP)
 /* SET BFP ROUNDING MODE */
-    F(0xb299, SRNM,    S,     Z,   0, 0, 0, 0, srnm, 0, IF_BFP)
-    F(0xb2b8, SRNMB,   S,     FPE, 0, 0, 0, 0, srnm, 0, IF_BFP)
+    F(0xb299, SRNM,    S,     Z,   la2, 0, 0, 0, srnm, 0, IF_BFP)
+    F(0xb2b8, SRNMB,   S,     FPE, la2, 0, 0, 0, srnmb, 0, IF_BFP)
 /* SET DFP ROUNDING MODE */
-    F(0xb2b9, SRNMT,   S,     DFPR, 0, 0, 0, 0, srnm, 0, IF_DFP)
+    F(0xb2b9, SRNMT,   S,     DFPR, la2, 0, 0, 0, srnmt, 0, IF_DFP)
 /* SET PROGRAM MASK */
     C(0x0400, SPM,     RR_a,  Z,   r1, 0, 0, 0, spm, 0)
 
diff --git a/target/s390x/insn-format.def b/target/s390x/insn-format.def
index a412d90fb7..4297ff4165 100644
--- a/target/s390x/insn-format.def
+++ b/target/s390x/insn-format.def
@@ -36,7 +36,7 @@ F3(RSY_a, R(1, 8),     BDL(2),      R(3,12))
 F3(RSY_b, R(1, 8),     BDL(2),      M(3,12))
 F2(RX_a,  R(1, 8),     BXD(2))
 F2(RX_b,  M(1, 8),     BXD(2))
-F2(RXE,   R(1, 8),     BXD(2))
+F3(RXE,   R(1, 8),     BXD(2),      M(3,32))
 F3(RXF,   R(1,32),     BXD(2),      R(3, 8))
 F2(RXY_a, R(1, 8),     BXDL(2))
 F2(RXY_b, M(1, 8),     BXDL(2))
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
index f2a771e2b4..7baf0e2404 100644
--- a/target/s390x/internal.h
+++ b/target/s390x/internal.h
@@ -101,7 +101,9 @@ typedef struct LowCore {
     /* whether the kernel died with panic() or not */
     uint32_t        panic_magic;              /* 0xe00 */
 
-    uint8_t         pad13[0x11b8 - 0xe04];    /* 0xe04 */
+    uint8_t         pad13[0x11b0 - 0xe04];    /* 0xe04 */
+
+    uint64_t        mcesad;                    /* 0x11B0 */
 
     /* 64 bit extparam used for pfault, diag 250 etc  */
     uint64_t        ext_params2;               /* 0x11B8 */
@@ -234,6 +236,7 @@ enum cc_op {
     CC_OP_SLA_32,               /* Calculate shift left signed (32bit) */
     CC_OP_SLA_64,               /* Calculate shift left signed (64bit) */
     CC_OP_FLOGR,                /* find leftmost one */
+    CC_OP_LCBB,                 /* load count to block boundary */
     CC_OP_MAX
 };
 
@@ -308,6 +311,15 @@ void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
 uint32_t set_cc_nz_f32(float32 v);
 uint32_t set_cc_nz_f64(float64 v);
 uint32_t set_cc_nz_f128(float128 v);
+#define S390_IEEE_MASK_INVALID   0x80
+#define S390_IEEE_MASK_DIVBYZERO 0x40
+#define S390_IEEE_MASK_OVERFLOW  0x20
+#define S390_IEEE_MASK_UNDERFLOW 0x10
+#define S390_IEEE_MASK_INEXACT   0x08
+#define S390_IEEE_MASK_QUANTUM   0x04
+uint8_t s390_softfloat_exc_to_ieee(unsigned int exc);
+int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3);
+void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode);
 
 
 /* gdbstub.c */
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 19072efec6..41fb466bb4 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -145,12 +145,18 @@ void s390x_translate_init(void)
     }
 }
 
-static inline int vec_reg_offset(uint8_t reg, uint8_t enr, TCGMemOp size)
+static inline int vec_full_reg_offset(uint8_t reg)
 {
-    const uint8_t es = 1 << size;
-    int offs = enr * es;
-
     g_assert(reg < 32);
+    return offsetof(CPUS390XState, vregs[reg][0].d);
+}
+
+static inline int vec_reg_offset(uint8_t reg, uint8_t enr, TCGMemOp es)
+{
+    /* Convert element size (es) - e.g. MO_8 - to bytes */
+    const uint8_t bytes = 1 << es;
+    int offs = enr * bytes;
+
     /*
      * vregs[n][0] is the lowest 8 byte and vregs[n][1] the highest 8 byte
      * of the 16 byte vector, on both, little and big endian systems.
@@ -173,11 +179,11 @@ static inline int vec_reg_offset(uint8_t reg, uint8_t enr, TCGMemOp size)
      * the two 8 byte elements have to be loaded separately. Let's force all
      * 16 byte operations to handle it in a special way.
      */
-    g_assert(size <= MO_64);
+    g_assert(es <= MO_64);
 #ifndef HOST_WORDS_BIGENDIAN
-    offs ^= (8 - es);
+    offs ^= (8 - bytes);
 #endif
-    return offs + offsetof(CPUS390XState, vregs[reg][0].d);
+    return offs + vec_full_reg_offset(reg);
 }
 
 static inline int freg64_offset(uint8_t reg)
@@ -376,32 +382,43 @@ static inline void gen_trap(DisasContext *s)
     gen_data_exception(0xff);
 }
 
+static void gen_addi_and_wrap_i64(DisasContext *s, TCGv_i64 dst, TCGv_i64 src,
+                                  int64_t imm)
+{
+    tcg_gen_addi_i64(dst, src, imm);
+    if (!(s->base.tb->flags & FLAG_MASK_64)) {
+        if (s->base.tb->flags & FLAG_MASK_32) {
+            tcg_gen_andi_i64(dst, dst, 0x7fffffff);
+        } else {
+            tcg_gen_andi_i64(dst, dst, 0x00ffffff);
+        }
+    }
+}
+
 static TCGv_i64 get_address(DisasContext *s, int x2, int b2, int d2)
 {
     TCGv_i64 tmp = tcg_temp_new_i64();
-    bool need_31 = !(s->base.tb->flags & FLAG_MASK_64);
-
-    /* Note that d2 is limited to 20 bits, signed.  If we crop negative
-       displacements early we create larger immedate addends.  */
 
-    /* Note that addi optimizes the imm==0 case.  */
+    /*
+     * Note that d2 is limited to 20 bits, signed.  If we crop negative
+     * displacements early we create larger immedate addends.
+     */
     if (b2 && x2) {
         tcg_gen_add_i64(tmp, regs[b2], regs[x2]);
-        tcg_gen_addi_i64(tmp, tmp, d2);
+        gen_addi_and_wrap_i64(s, tmp, tmp, d2);
     } else if (b2) {
-        tcg_gen_addi_i64(tmp, regs[b2], d2);
+        gen_addi_and_wrap_i64(s, tmp, regs[b2], d2);
     } else if (x2) {
-        tcg_gen_addi_i64(tmp, regs[x2], d2);
-    } else {
-        if (need_31) {
-            d2 &= 0x7fffffff;
-            need_31 = false;
+        gen_addi_and_wrap_i64(s, tmp, regs[x2], d2);
+    } else if (!(s->base.tb->flags & FLAG_MASK_64)) {
+        if (s->base.tb->flags & FLAG_MASK_32) {
+            tcg_gen_movi_i64(tmp, d2 & 0x7fffffff);
+        } else {
+            tcg_gen_movi_i64(tmp, d2 & 0x00ffffff);
         }
+    } else {
         tcg_gen_movi_i64(tmp, d2);
     }
-    if (need_31) {
-        tcg_gen_andi_i64(tmp, tmp, 0x7fffffff);
-    }
 
     return tmp;
 }
@@ -540,6 +557,7 @@ static void gen_op_calc_cc(DisasContext *s)
     case CC_OP_NZ_F32:
     case CC_OP_NZ_F64:
     case CC_OP_FLOGR:
+    case CC_OP_LCBB:
         /* 1 argument */
         gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, dummy, cc_dst, dummy);
         break;
@@ -1758,160 +1776,257 @@ static DisasJumpType op_cxb(DisasContext *s, DisasOps *o)
     return DISAS_NEXT;
 }
 
+static TCGv_i32 fpinst_extract_m34(DisasContext *s, bool m3_with_fpe,
+                                   bool m4_with_fpe)
+{
+    const bool fpe = s390_has_feat(S390_FEAT_FLOATING_POINT_EXT);
+    uint8_t m3 = get_field(s->fields, m3);
+    uint8_t m4 = get_field(s->fields, m4);
+
+    /* m3 field was introduced with FPE */
+    if (!fpe && m3_with_fpe) {
+        m3 = 0;
+    }
+    /* m4 field was introduced with FPE */
+    if (!fpe && m4_with_fpe) {
+        m4 = 0;
+    }
+
+    /* Check for valid rounding modes. Mode 3 was introduced later. */
+    if (m3 == 2 || m3 > 7 || (!fpe && m3 == 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return NULL;
+    }
+
+    return tcg_const_i32(deposit32(m3, 4, 4, m4));
+}
+
 static DisasJumpType op_cfeb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_cfeb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cfeb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     gen_set_cc_nz_f32(s, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_cfdb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_cfdb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cfdb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     gen_set_cc_nz_f64(s, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_cfxb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_cfxb(o->out, cpu_env, o->in1, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cfxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
     gen_set_cc_nz_f128(s, o->in1, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_cgeb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_cgeb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cgeb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     gen_set_cc_nz_f32(s, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_cgdb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_cgdb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cgdb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     gen_set_cc_nz_f64(s, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_cgxb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_cgxb(o->out, cpu_env, o->in1, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cgxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
     gen_set_cc_nz_f128(s, o->in1, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_clfeb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_clfeb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clfeb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     gen_set_cc_nz_f32(s, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_clfdb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_clfdb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clfdb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     gen_set_cc_nz_f64(s, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_clfxb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_clfxb(o->out, cpu_env, o->in1, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clfxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
     gen_set_cc_nz_f128(s, o->in1, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_clgeb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_clgeb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clgeb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     gen_set_cc_nz_f32(s, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_clgdb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_clgdb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clgdb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     gen_set_cc_nz_f64(s, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_clgxb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_clgxb(o->out, cpu_env, o->in1, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clgxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
     gen_set_cc_nz_f128(s, o->in1, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_cegb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_cegb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cegb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_cdgb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_cdgb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cdgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_cxgb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_cxgb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cxgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     return_low128(o->out2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_celgb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_celgb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_celgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_cdlgb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_cdlgb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cdlgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_cxlgb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_cxlgb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cxlgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     return_low128(o->out2);
     return DISAS_NEXT;
 }
@@ -2390,26 +2505,38 @@ static DisasJumpType op_ex(DisasContext *s, DisasOps *o)
 
 static DisasJumpType op_fieb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_fieb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_fieb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_fidb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_fidb(o->out, cpu_env, o->in2, m3);
-    tcg_temp_free_i32(m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_fidb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_fixb(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3));
-    gen_helper_fixb(o->out, cpu_env, o->in1, o->in2, m3);
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_fixb(o->out, cpu_env, o->in1, o->in2, m34);
     return_low128(o->out2);
-    tcg_temp_free_i32(m3);
+    tcg_temp_free_i32(m34);
     return DISAS_NEXT;
 }
 
@@ -2678,19 +2805,37 @@ static DisasJumpType op_ldeb(DisasContext *s, DisasOps *o)
 
 static DisasJumpType op_ledb(DisasContext *s, DisasOps *o)
 {
-    gen_helper_ledb(o->out, cpu_env, o->in2);
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_ledb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_ldxb(DisasContext *s, DisasOps *o)
 {
-    gen_helper_ldxb(o->out, cpu_env, o->in1, o->in2);
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_ldxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_lexb(DisasContext *s, DisasOps *o)
 {
-    gen_helper_lexb(o->out, cpu_env, o->in1, o->in2);
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_lexb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
     return DISAS_NEXT;
 }
 
@@ -2708,6 +2853,12 @@ static DisasJumpType op_lxeb(DisasContext *s, DisasOps *o)
     return DISAS_NEXT;
 }
 
+static DisasJumpType op_lde(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_shli_i64(o->out, o->in2, 32);
+    return DISAS_NEXT;
+}
+
 static DisasJumpType op_llgt(DisasContext *s, DisasOps *o)
 {
     tcg_gen_andi_i64(o->out, o->in2, 0x7fffffff);
@@ -3119,6 +3270,23 @@ static DisasJumpType op_lzrb(DisasContext *s, DisasOps *o)
     return DISAS_NEXT;
 }
 
+static DisasJumpType op_lcbb(DisasContext *s, DisasOps *o)
+{
+    const int64_t block_size = (1ull << (get_field(s->fields, m3) + 6));
+
+    if (get_field(s->fields, m3) > 6) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    tcg_gen_ori_i64(o->addr1, o->addr1, -block_size);
+    tcg_gen_neg_i64(o->addr1, o->addr1);
+    tcg_gen_movi_i64(o->out, 16);
+    tcg_gen_umin_i64(o->out, o->out, o->addr1);
+    gen_op_update1_cc_i64(s, CC_OP_LCBB, o->out);
+    return DISAS_NEXT;
+}
+
 static DisasJumpType op_mov2(DisasContext *s, DisasOps *o)
 {
     o->out = o->in2;
@@ -3955,41 +4123,33 @@ static DisasJumpType op_sfas(DisasContext *s, DisasOps *o)
 
 static DisasJumpType op_srnm(DisasContext *s, DisasOps *o)
 {
-    int b2 = get_field(s->fields, b2);
-    int d2 = get_field(s->fields, d2);
-    TCGv_i64 t1 = tcg_temp_new_i64();
-    TCGv_i64 t2 = tcg_temp_new_i64();
-    int mask, pos, len;
+    /* Bits other than 62 and 63 are ignored. Bit 29 is set to zero. */
+    tcg_gen_andi_i64(o->addr1, o->addr1, 0x3ull);
+    gen_helper_srnm(cpu_env, o->addr1);
+    return DISAS_NEXT;
+}
 
-    switch (s->fields->op2) {
-    case 0x99: /* SRNM */
-        pos = 0, len = 2;
-        break;
-    case 0xb8: /* SRNMB */
-        pos = 0, len = 3;
-        break;
-    case 0xb9: /* SRNMT */
-        pos = 4, len = 3;
-        break;
-    default:
-        tcg_abort();
-    }
-    mask = (1 << len) - 1;
+static DisasJumpType op_srnmb(DisasContext *s, DisasOps *o)
+{
+    /* Bits 0-55 are are ignored. */
+    tcg_gen_andi_i64(o->addr1, o->addr1, 0xffull);
+    gen_helper_srnm(cpu_env, o->addr1);
+    return DISAS_NEXT;
+}
 
-    /* Insert the value into the appropriate field of the FPC.  */
-    if (b2 == 0) {
-        tcg_gen_movi_i64(t1, d2 & mask);
-    } else {
-        tcg_gen_addi_i64(t1, regs[b2], d2);
-        tcg_gen_andi_i64(t1, t1, mask);
-    }
-    tcg_gen_ld32u_i64(t2, cpu_env, offsetof(CPUS390XState, fpc));
-    tcg_gen_deposit_i64(t2, t2, t1, pos, len);
-    tcg_temp_free_i64(t1);
+static DisasJumpType op_srnmt(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 tmp = tcg_temp_new_i64();
 
-    /* Then install the new FPC to set the rounding mode in fpu_status.  */
-    gen_helper_sfpc(cpu_env, t2);
-    tcg_temp_free_i64(t2);
+    /* Bits other than 61-63 are ignored. */
+    tcg_gen_andi_i64(o->addr1, o->addr1, 0x7ull);
+
+    /* No need to call a helper, we don't implement dfp */
+    tcg_gen_ld32u_i64(tmp, cpu_env, offsetof(CPUS390XState, fpc));
+    tcg_gen_deposit_i64(tmp, tmp, o->addr1, 4, 3);
+    tcg_gen_st32_i64(tmp, cpu_env, offsetof(CPUS390XState, fpc));
+
+    tcg_temp_free_i64(tmp);
     return DISAS_NEXT;
 }
 
@@ -5908,6 +6068,7 @@ enum DisasInsnEnum {
 #define FAC_ECT         S390_FEAT_EXTRACT_CPU_TIME
 #define FAC_PCI         S390_FEAT_ZPCI /* z/PCI facility */
 #define FAC_AIS         S390_FEAT_ADAPTER_INT_SUPPRESSION
+#define FAC_V           S390_FEAT_VECTOR /* vector facility */
 
 static const DisasInsn insn_info[] = {
 #include "insn-data.def"
@@ -6091,7 +6252,7 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
     const DisasInsn *insn;
     DisasJumpType ret = DISAS_NEXT;
     DisasFields f;
-    DisasOps o;
+    DisasOps o = {};
 
     /* Search for the insn in the table.  */
     insn = extract_insn(env, s, &f);
@@ -6161,12 +6322,6 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
     /* Set up the strutures we use to communicate with the helpers. */
     s->insn = insn;
     s->fields = &f;
-    o.g_out = o.g_out2 = o.g_in1 = o.g_in2 = false;
-    o.out = NULL;
-    o.out2 = NULL;
-    o.in1 = NULL;
-    o.in2 = NULL;
-    o.addr1 = NULL;
 
     /* Implement the instruction.  */
     if (insn->help_in1) {