summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--linux-user/i386/signal.c667
-rw-r--r--target/i386/cpu.h57
-rw-r--r--target/i386/tcg/access.c169
-rw-r--r--target/i386/tcg/access.h40
-rw-r--r--target/i386/tcg/fpu_helper.c553
-rw-r--r--target/i386/tcg/meson.build1
-rw-r--r--tests/tcg/x86_64/Makefile.target1
-rw-r--r--tests/tcg/x86_64/test-1648.c33
8 files changed, 1018 insertions, 503 deletions
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index 990048f42a..cb90711834 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -34,15 +34,22 @@ struct target_fpreg {
     uint16_t exponent;
 };
 
-struct target_fpxreg {
-    uint16_t significand[4];
-    uint16_t exponent;
-    uint16_t padding[3];
-};
+/* Legacy x87 fpu state format for FSAVE/FRESTOR. */
+struct target_fregs_state {
+    uint32_t cwd;
+    uint32_t swd;
+    uint32_t twd;
+    uint32_t fip;
+    uint32_t fcs;
+    uint32_t foo;
+    uint32_t fos;
+    struct target_fpreg st[8];
 
-struct target_xmmreg {
-    uint32_t element[4];
+    /* Software status information [not touched by FSAVE]. */
+    uint16_t status;
+    uint16_t magic;   /* 0xffff: FPU data only, 0x0000: FXSR FPU data */
 };
+QEMU_BUILD_BUG_ON(sizeof(struct target_fregs_state) != 32 + 80);
 
 struct target_fpx_sw_bytes {
     uint32_t magic1;
@@ -53,55 +60,11 @@ struct target_fpx_sw_bytes {
 };
 QEMU_BUILD_BUG_ON(sizeof(struct target_fpx_sw_bytes) != 12*4);
 
-struct target_fpstate_fxsave {
-    /* FXSAVE format */
-    uint16_t cw;
-    uint16_t sw;
-    uint16_t twd;
-    uint16_t fop;
-    uint64_t rip;
-    uint64_t rdp;
-    uint32_t mxcsr;
-    uint32_t mxcsr_mask;
-    uint32_t st_space[32];
-    uint32_t xmm_space[64];
-    uint32_t hw_reserved[12];
-    struct target_fpx_sw_bytes sw_reserved;
-    uint8_t xfeatures[];
-};
-#define TARGET_FXSAVE_SIZE   sizeof(struct target_fpstate_fxsave)
-QEMU_BUILD_BUG_ON(TARGET_FXSAVE_SIZE != 512);
-QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_fxsave, sw_reserved) != 464);
-
 struct target_fpstate_32 {
-    /* Regular FPU environment */
-    uint32_t cw;
-    uint32_t sw;
-    uint32_t tag;
-    uint32_t ipoff;
-    uint32_t cssel;
-    uint32_t dataoff;
-    uint32_t datasel;
-    struct target_fpreg st[8];
-    uint16_t  status;
-    uint16_t  magic;          /* 0xffff = regular FPU data only */
-    struct target_fpstate_fxsave fxsave;
+    struct target_fregs_state fpstate;
+    X86LegacyXSaveArea fxstate;
 };
 
-/*
- * For simplicity, setup_frame aligns struct target_fpstate_32 to
- * 16 bytes, so ensure that the FXSAVE area is also aligned.
- */
-QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_32, fxsave) & 15);
-
-#ifndef TARGET_X86_64
-# define target_fpstate target_fpstate_32
-# define TARGET_FPSTATE_FXSAVE_OFFSET offsetof(struct target_fpstate_32, fxsave)
-#else
-# define target_fpstate target_fpstate_fxsave
-# define TARGET_FPSTATE_FXSAVE_OFFSET 0
-#endif
-
 struct target_sigcontext_32 {
     uint16_t gs, __gsh;
     uint16_t fs, __fsh;
@@ -184,24 +147,16 @@ struct sigframe {
     int sig;
     struct target_sigcontext sc;
     /*
-     * The actual fpstate is placed after retcode[] below, to make
-     * room for the variable-sized xsave data.  The older unused fpstate
-     * has to be kept to avoid changing the offset of extramask[], which
+     * The actual fpstate is placed after retcode[] below, to make room
+     * for the variable-sized xsave data.  The older unused fpstate has
+     * to be kept to avoid changing the offset of extramask[], which
      * is part of the ABI.
      */
-    struct target_fpstate fpstate_unused;
+    struct target_fpstate_32 fpstate_unused;
     abi_ulong extramask[TARGET_NSIG_WORDS-1];
     char retcode[8];
-
-    /*
-     * This field will be 16-byte aligned in memory.  Applying QEMU_ALIGNED
-     * to it ensures that the base of the frame has an appropriate alignment
-     * too.
-     */
-    struct target_fpstate fpstate QEMU_ALIGNED(8);
+    /* fp state follows here */
 };
-#define TARGET_SIGFRAME_FXSAVE_OFFSET (                                    \
-    offsetof(struct sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET)
 
 struct rt_sigframe {
     abi_ulong pretcode;
@@ -211,10 +166,8 @@ struct rt_sigframe {
     struct target_siginfo info;
     struct target_ucontext uc;
     char retcode[8];
-    struct target_fpstate fpstate QEMU_ALIGNED(8);
+    /* fp state follows here */
 };
-#define TARGET_RT_SIGFRAME_FXSAVE_OFFSET (                                 \
-    offsetof(struct rt_sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET)
 
 /*
  * Verify that vdso-asmoffset.h constants match.
@@ -232,64 +185,175 @@ struct rt_sigframe {
     abi_ulong pretcode;
     struct target_ucontext uc;
     struct target_siginfo info;
-    struct target_fpstate fpstate QEMU_ALIGNED(16);
+    /* fp state follows here */
 };
-#define TARGET_RT_SIGFRAME_FXSAVE_OFFSET (                                 \
-    offsetof(struct rt_sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET)
 #endif
 
+typedef enum {
+#ifndef TARGET_X86_64
+    FPSTATE_FSAVE,
+#endif
+    FPSTATE_FXSAVE,
+    FPSTATE_XSAVE
+} FPStateKind;
+
+static FPStateKind get_fpstate_kind(CPUX86State *env)
+{
+    if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) {
+        return FPSTATE_XSAVE;
+    }
+#ifdef TARGET_X86_64
+    return FPSTATE_FXSAVE;
+#else
+    if (env->features[FEAT_1_EDX] & CPUID_FXSR) {
+        return FPSTATE_FXSAVE;
+    }
+    return FPSTATE_FSAVE;
+#endif
+}
+
+static unsigned get_fpstate_size(CPUX86State *env, FPStateKind fpkind)
+{
+    /*
+     * Kernel:
+     *   fpu__alloc_mathframe
+     *     xstate_sigframe_size(current->thread.fpu.fpstate);
+     *       size = fpstate->user_size
+     *       use_xsave() ? size + FP_XSTATE_MAGIC2_SIZE : size
+     *   where fpstate->user_size is computed at init in
+     *   fpu__init_system_xstate_size_legacy and
+     *   fpu__init_system_xstate.
+     *
+     * Here we have no place to pre-compute, so inline it all.
+     */
+    switch (fpkind) {
+    case FPSTATE_XSAVE:
+        return (xsave_area_size(env->xcr0, false)
+                + TARGET_FP_XSTATE_MAGIC2_SIZE);
+    case FPSTATE_FXSAVE:
+        return sizeof(X86LegacyXSaveArea);
+#ifndef TARGET_X86_64
+    case FPSTATE_FSAVE:
+        return sizeof(struct target_fregs_state);
+#endif
+    }
+    g_assert_not_reached();
+}
+
+static abi_ptr get_sigframe(struct target_sigaction *ka, CPUX86State *env,
+                            unsigned frame_size, FPStateKind fpkind,
+                            abi_ptr *fpstate, abi_ptr *fxstate, abi_ptr *fpend)
+{
+    abi_ptr sp;
+    unsigned math_size;
+
+    /* Default to using normal stack */
+    sp = get_sp_from_cpustate(env);
+#ifdef TARGET_X86_64
+    sp -= 128; /* this is the redzone */
+#endif
+
+    /* This is the X/Open sanctioned signal stack switching.  */
+    if (ka->sa_flags & TARGET_SA_ONSTACK) {
+        sp = target_sigsp(sp, ka);
+    } else {
+#ifndef TARGET_X86_64
+        /* This is the legacy signal stack switching. */
+        if ((env->segs[R_SS].selector & 0xffff) != __USER_DS
+            && !(ka->sa_flags & TARGET_SA_RESTORER)
+            && ka->sa_restorer) {
+            sp = ka->sa_restorer;
+        }
+#endif
+    }
+
+    math_size = get_fpstate_size(env, fpkind);
+    sp = ROUND_DOWN(sp - math_size, 64);
+    *fpend = sp + math_size;
+    *fxstate = sp;
+#ifndef TARGET_X86_64
+    if (fpkind != FPSTATE_FSAVE) {
+        sp -= sizeof(struct target_fregs_state);
+    }
+#endif
+    *fpstate = sp;
+
+    sp -= frame_size;
+    /*
+     * Align the stack pointer according to the ABI, i.e. so that on
+     * function entry ((sp + sizeof(return_addr)) & 15) == 0.
+     */
+    sp += sizeof(target_ulong);
+    sp = ROUND_DOWN(sp, 16);
+    sp -= sizeof(target_ulong);
+
+    return sp;
+}
+
 /*
  * Set up a signal frame.
  */
 
-static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxsave,
-                             abi_ulong fxsave_addr)
+static void fxsave_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxstate)
 {
-    if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
-        /* fxsave_addr must be 16 byte aligned for fxsave */
-        assert(!(fxsave_addr & 0xf));
+    struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved;
 
-        cpu_x86_fxsave(env, fxsave_addr);
-        __put_user(0, &fxsave->sw_reserved.magic1);
-    } else {
-        uint32_t xstate_size = xsave_area_size(env->xcr0, false);
-        uint32_t xfeatures_size = xstate_size - TARGET_FXSAVE_SIZE;
+    cpu_x86_fxsave(env, fxstate, sizeof(*fxstate));
+    __put_user(0, &sw->magic1);
+}
 
-        /*
-         * extended_size is the offset from fpstate_addr to right after the end
-         * of the extended save states.  On 32-bit that includes the legacy
-         * FSAVE area.
-         */
-        uint32_t extended_size = TARGET_FPSTATE_FXSAVE_OFFSET
-            + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE;
-
-        /* fxsave_addr must be 64 byte aligned for xsave */
-        assert(!(fxsave_addr & 0x3f));
-
-        /* Zero the header, XSAVE *adds* features to an existing save state.  */
-        memset(fxsave->xfeatures, 0, 64);
-        cpu_x86_xsave(env, fxsave_addr);
-        __put_user(TARGET_FP_XSTATE_MAGIC1, &fxsave->sw_reserved.magic1);
-        __put_user(extended_size, &fxsave->sw_reserved.extended_size);
-        __put_user(env->xcr0, &fxsave->sw_reserved.xfeatures);
-        __put_user(xstate_size, &fxsave->sw_reserved.xstate_size);
-        __put_user(TARGET_FP_XSTATE_MAGIC2, (uint32_t *) &fxsave->xfeatures[xfeatures_size]);
-    }
+static void xsave_sigcontext(CPUX86State *env,
+                             X86LegacyXSaveArea *fxstate,
+                             abi_ptr fpstate_addr,
+                             abi_ptr xstate_addr,
+                             abi_ptr fpend_addr)
+{
+    struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved;
+    /*
+     * extended_size is the offset from fpstate_addr to right after
+     * the end of the extended save states.  On 32-bit that includes
+     * the legacy FSAVE area.
+     */
+    uint32_t extended_size = fpend_addr - fpstate_addr;
+    /* Recover xstate_size by removing magic2. */
+    uint32_t xstate_size = (fpend_addr - xstate_addr
+                            - TARGET_FP_XSTATE_MAGIC2_SIZE);
+    /* magic2 goes just after xstate. */
+    uint32_t *magic2 = (void *)fxstate + xstate_size;
+
+    /* xstate_addr must be 64 byte aligned for xsave */
+    assert(!(xstate_addr & 0x3f));
+
+    /* Zero the header, XSAVE *adds* features to an existing save state.  */
+    memset(fxstate + 1, 0, sizeof(X86XSaveHeader));
+    cpu_x86_xsave(env, fxstate, fpend_addr - xstate_addr, env->xcr0);
+
+    __put_user(TARGET_FP_XSTATE_MAGIC1, &sw->magic1);
+    __put_user(extended_size, &sw->extended_size);
+    __put_user(env->xcr0, &sw->xfeatures);
+    __put_user(xstate_size, &sw->xstate_size);
+    __put_user(TARGET_FP_XSTATE_MAGIC2, magic2);
 }
 
-static void setup_sigcontext(struct target_sigcontext *sc,
-        struct target_fpstate *fpstate, CPUX86State *env, abi_ulong mask,
-        abi_ulong fpstate_addr)
+static void setup_sigcontext(CPUX86State *env,
+                             struct target_sigcontext *sc,
+                             abi_ulong mask, FPStateKind fpkind,
+                             struct target_fregs_state *fpstate,
+                             abi_ptr fpstate_addr,
+                             X86LegacyXSaveArea *fxstate,
+                             abi_ptr fxstate_addr,
+                             abi_ptr fpend_addr)
 {
     CPUState *cs = env_cpu(env);
+
 #ifndef TARGET_X86_64
     uint16_t magic;
 
     /* already locked in setup_frame() */
-    __put_user(env->segs[R_GS].selector, (unsigned int *)&sc->gs);
-    __put_user(env->segs[R_FS].selector, (unsigned int *)&sc->fs);
-    __put_user(env->segs[R_ES].selector, (unsigned int *)&sc->es);
-    __put_user(env->segs[R_DS].selector, (unsigned int *)&sc->ds);
+    __put_user(env->segs[R_GS].selector, (uint32_t *)&sc->gs);
+    __put_user(env->segs[R_FS].selector, (uint32_t *)&sc->fs);
+    __put_user(env->segs[R_ES].selector, (uint32_t *)&sc->es);
+    __put_user(env->segs[R_DS].selector, (uint32_t *)&sc->ds);
     __put_user(env->regs[R_EDI], &sc->edi);
     __put_user(env->regs[R_ESI], &sc->esi);
     __put_user(env->regs[R_EBP], &sc->ebp);
@@ -301,20 +365,14 @@ static void setup_sigcontext(struct target_sigcontext *sc,
     __put_user(cs->exception_index, &sc->trapno);
     __put_user(env->error_code, &sc->err);
     __put_user(env->eip, &sc->eip);
-    __put_user(env->segs[R_CS].selector, (unsigned int *)&sc->cs);
+    __put_user(env->segs[R_CS].selector, (uint32_t *)&sc->cs);
     __put_user(env->eflags, &sc->eflags);
     __put_user(env->regs[R_ESP], &sc->esp_at_signal);
-    __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss);
+    __put_user(env->segs[R_SS].selector, (uint32_t *)&sc->ss);
 
-    cpu_x86_fsave(env, fpstate_addr, 1);
-    fpstate->status = fpstate->sw;
-    if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) {
-        magic = 0xffff;
-    } else {
-        xsave_sigcontext(env, &fpstate->fxsave,
-                         fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET);
-        magic = 0;
-    }
+    cpu_x86_fsave(env, fpstate, sizeof(*fpstate));
+    fpstate->status = fpstate->swd;
+    magic = (fpkind == FPSTATE_FSAVE ? 0 : 0xffff);
     __put_user(magic, &fpstate->magic);
 #else
     __put_user(env->regs[R_EDI], &sc->rdi);
@@ -344,57 +402,25 @@ static void setup_sigcontext(struct target_sigcontext *sc,
     __put_user((uint16_t)0, &sc->gs);
     __put_user((uint16_t)0, &sc->fs);
     __put_user(env->segs[R_SS].selector, &sc->ss);
-
-    xsave_sigcontext(env, fpstate, fpstate_addr);
 #endif
 
-    __put_user(fpstate_addr, &sc->fpstate);
+    switch (fpkind) {
+    case FPSTATE_XSAVE:
+        xsave_sigcontext(env, fxstate, fpstate_addr, fxstate_addr, fpend_addr);
+        break;
+    case FPSTATE_FXSAVE:
+        fxsave_sigcontext(env, fxstate);
+        break;
+    default:
+        break;
+    }
 
+    __put_user(fpstate_addr, &sc->fpstate);
     /* non-iBCS2 extensions.. */
     __put_user(mask, &sc->oldmask);
     __put_user(env->cr[2], &sc->cr2);
 }
 
-/*
- * Determine which stack to use..
- */
-
-static inline abi_ulong
-get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t fxsave_offset)
-{
-    unsigned long esp;
-
-    /* Default to using normal stack */
-    esp = get_sp_from_cpustate(env);
-#ifdef TARGET_X86_64
-    esp -= 128; /* this is the redzone */
-#endif
-
-    /* This is the X/Open sanctioned signal stack switching.  */
-    if (ka->sa_flags & TARGET_SA_ONSTACK) {
-        esp = target_sigsp(esp, ka);
-    } else {
-#ifndef TARGET_X86_64
-        /* This is the legacy signal stack switching. */
-        if ((env->segs[R_SS].selector & 0xffff) != __USER_DS &&
-                !(ka->sa_flags & TARGET_SA_RESTORER) &&
-                ka->sa_restorer) {
-            esp = (unsigned long) ka->sa_restorer;
-        }
-#endif
-    }
-
-    if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) {
-        return (esp - (fxsave_offset + TARGET_FXSAVE_SIZE)) & -8ul;
-    } else if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
-        return ((esp - TARGET_FXSAVE_SIZE) & -16ul) - fxsave_offset;
-    } else {
-        size_t xstate_size =
-               xsave_area_size(env->xcr0, false) + TARGET_FP_XSTATE_MAGIC2_SIZE;
-        return ((esp - xstate_size) & -64ul) - fxsave_offset;
-    }
-}
-
 #ifndef TARGET_X86_64
 static void install_sigtramp(void *tramp)
 {
@@ -416,22 +442,36 @@ static void install_rt_sigtramp(void *tramp)
 void setup_frame(int sig, struct target_sigaction *ka,
                  target_sigset_t *set, CPUX86State *env)
 {
-    abi_ulong frame_addr;
+    abi_ptr frame_addr, fpstate_addr, fxstate_addr, fpend_addr;
     struct sigframe *frame;
-    int i;
-
-    frame_addr = get_sigframe(ka, env, TARGET_SIGFRAME_FXSAVE_OFFSET);
+    struct target_fregs_state *fpstate;
+    X86LegacyXSaveArea *fxstate;
+    unsigned total_size;
+    FPStateKind fpkind;
+
+    fpkind = get_fpstate_kind(env);
+    frame_addr = get_sigframe(ka, env, sizeof(struct sigframe), fpkind,
+                              &fpstate_addr, &fxstate_addr, &fpend_addr);
     trace_user_setup_frame(env, frame_addr);
 
-    if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0))
-        goto give_sigsegv;
+    total_size = fpend_addr - frame_addr;
+    frame = lock_user(VERIFY_WRITE, frame_addr, total_size, 0);
+    if (!frame) {
+        force_sigsegv(sig);
+        return;
+    }
 
-    __put_user(sig, &frame->sig);
+    fxstate = (void *)frame + (fxstate_addr - frame_addr);
+#ifdef TARGET_X86_64
+    fpstate = NULL;
+#else
+    fpstate = (void *)frame + (fpstate_addr - frame_addr);
+#endif
 
-    setup_sigcontext(&frame->sc, &frame->fpstate, env, set->sig[0],
-            frame_addr + offsetof(struct sigframe, fpstate));
+    setup_sigcontext(env, &frame->sc, set->sig[0], fpkind,
+                     fpstate, fpstate_addr, fxstate, fxstate_addr, fpend_addr);
 
-    for (i = 1; i < TARGET_NSIG_WORDS; i++) {
+    for (int i = 1; i < TARGET_NSIG_WORDS; i++) {
         __put_user(set->sig[i], &frame->extramask[i - 1]);
     }
 
@@ -444,23 +484,24 @@ void setup_frame(int sig, struct target_sigaction *ka,
         install_sigtramp(frame->retcode);
         __put_user(default_sigreturn, &frame->pretcode);
     }
+    unlock_user(frame, frame_addr, total_size);
 
     /* Set up registers for signal handler */
     env->regs[R_ESP] = frame_addr;
     env->eip = ka->_sa_handler;
 
+    /* Store argument for both -mregparm=3 and standard. */
+    env->regs[R_EAX] = sig;
+    __put_user(sig, &frame->sig);
+    /* The kernel clears EDX and ECX even though there is only one arg. */
+    env->regs[R_EDX] = 0;
+    env->regs[R_ECX] = 0;
+
     cpu_x86_load_seg(env, R_DS, __USER_DS);
     cpu_x86_load_seg(env, R_ES, __USER_DS);
     cpu_x86_load_seg(env, R_SS, __USER_DS);
     cpu_x86_load_seg(env, R_CS, __USER_CS);
     env->eflags &= ~TF_MASK;
-
-    unlock_user_struct(frame, frame_addr, 1);
-
-    return;
-
-give_sigsegv:
-    force_sigsegv(sig);
 }
 #endif
 
@@ -469,48 +510,51 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
                     target_siginfo_t *info,
                     target_sigset_t *set, CPUX86State *env)
 {
-    abi_ulong frame_addr;
-#ifndef TARGET_X86_64
-    abi_ulong addr;
-#endif
+    abi_ptr frame_addr, fpstate_addr, fxstate_addr, fpend_addr;
     struct rt_sigframe *frame;
-    int i;
-
-    frame_addr = get_sigframe(ka, env, TARGET_RT_SIGFRAME_FXSAVE_OFFSET);
+    X86LegacyXSaveArea *fxstate;
+    struct target_fregs_state *fpstate;
+    unsigned total_size;
+    FPStateKind fpkind;
+
+    fpkind = get_fpstate_kind(env);
+    frame_addr = get_sigframe(ka, env, sizeof(struct rt_sigframe), fpkind,
+                              &fpstate_addr, &fxstate_addr, &fpend_addr);
     trace_user_setup_rt_frame(env, frame_addr);
 
-    if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0))
+    total_size = fpend_addr - frame_addr;
+    frame = lock_user(VERIFY_WRITE, frame_addr, total_size, 0);
+    if (!frame) {
         goto give_sigsegv;
+    }
 
-    /* These fields are only in rt_sigframe on 32 bit */
-#ifndef TARGET_X86_64
-    __put_user(sig, &frame->sig);
-    addr = frame_addr + offsetof(struct rt_sigframe, info);
-    __put_user(addr, &frame->pinfo);
-    addr = frame_addr + offsetof(struct rt_sigframe, uc);
-    __put_user(addr, &frame->puc);
-#endif
     if (ka->sa_flags & TARGET_SA_SIGINFO) {
         frame->info = *info;
     }
 
     /* Create the ucontext.  */
-    if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) {
-        __put_user(1, &frame->uc.tuc_flags);
-    } else {
-        __put_user(0, &frame->uc.tuc_flags);
-    }
+    __put_user(fpkind == FPSTATE_XSAVE, &frame->uc.tuc_flags);
     __put_user(0, &frame->uc.tuc_link);
     target_save_altstack(&frame->uc.tuc_stack, env);
-    setup_sigcontext(&frame->uc.tuc_mcontext, &frame->fpstate, env,
-            set->sig[0], frame_addr + offsetof(struct rt_sigframe, fpstate));
 
-    for (i = 0; i < TARGET_NSIG_WORDS; i++) {
+    fxstate = (void *)frame + (fxstate_addr - frame_addr);
+#ifdef TARGET_X86_64
+    fpstate = NULL;
+#else
+    fpstate = (void *)frame + (fpstate_addr - frame_addr);
+#endif
+
+    setup_sigcontext(env, &frame->uc.tuc_mcontext, set->sig[0], fpkind,
+                     fpstate, fpstate_addr, fxstate, fxstate_addr, fpend_addr);
+
+    for (int i = 0; i < TARGET_NSIG_WORDS; i++) {
         __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]);
     }
 
-    /* Set up to return from userspace.  If provided, use a stub
-       already in userspace.  */
+    /*
+     * Set up to return from userspace.  If provided, use a stub
+     * already in userspace.
+     */
     if (ka->sa_flags & TARGET_SA_RESTORER) {
         __put_user(ka->sa_restorer, &frame->pretcode);
     } else {
@@ -529,63 +573,148 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
     env->eip = ka->_sa_handler;
 
 #ifndef TARGET_X86_64
+    /* Store arguments for both -mregparm=3 and standard. */
     env->regs[R_EAX] = sig;
+    __put_user(sig, &frame->sig);
     env->regs[R_EDX] = frame_addr + offsetof(struct rt_sigframe, info);
+    __put_user(env->regs[R_EDX], &frame->pinfo);
     env->regs[R_ECX] = frame_addr + offsetof(struct rt_sigframe, uc);
+    __put_user(env->regs[R_ECX], &frame->puc);
 #else
     env->regs[R_EAX] = 0;
     env->regs[R_EDI] = sig;
     env->regs[R_ESI] = frame_addr + offsetof(struct rt_sigframe, info);
     env->regs[R_EDX] = frame_addr + offsetof(struct rt_sigframe, uc);
 #endif
+    unlock_user(frame, frame_addr, total_size);
 
     cpu_x86_load_seg(env, R_DS, __USER_DS);
     cpu_x86_load_seg(env, R_ES, __USER_DS);
     cpu_x86_load_seg(env, R_CS, __USER_CS);
     cpu_x86_load_seg(env, R_SS, __USER_DS);
     env->eflags &= ~TF_MASK;
-
-    unlock_user_struct(frame, frame_addr, 1);
-
     return;
 
 give_sigsegv:
     force_sigsegv(sig);
 }
 
-static int xrstor_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxsave,
-                             abi_ulong fxsave_addr)
+/*
+ * Restore a signal frame.
+ */
+
+static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
+                              X86LegacyXSaveArea *fxstate,
+                              abi_ptr fxstate_addr)
+{
+    struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved;
+    uint32_t magic1, magic2;
+    uint32_t extended_size, xstate_size, min_size, max_size;
+    uint64_t xfeatures;
+    void *xstate;
+    bool ok;
+
+    switch (fpkind) {
+    case FPSTATE_XSAVE:
+        magic1 = tswap32(sw->magic1);
+        extended_size = tswap32(sw->extended_size);
+        xstate_size = tswap32(sw->xstate_size);
+        min_size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader);
+        max_size = xsave_area_size(env->xcr0, false);
+
+        /* Check for the first magic field and other error scenarios. */
+        if (magic1 != TARGET_FP_XSTATE_MAGIC1 ||
+            xstate_size < min_size ||
+            xstate_size > max_size ||
+            xstate_size > extended_size) {
+            break;
+        }
+
+        /*
+         * Restore the features indicated in the frame, masked by
+         * those currently enabled.  Re-check the frame size.
+         * ??? It is not clear where the kernel does this, but it
+         * is not in check_xstate_in_sigframe, and so (probably)
+         * does not fall back to fxrstor.
+         */
+        xfeatures = tswap64(sw->xfeatures) & env->xcr0;
+        min_size = xsave_area_size(xfeatures, false);
+        if (xstate_size < min_size) {
+            return false;
+        }
+
+        /* Re-lock the entire xstate area, with the extensions and magic. */
+        xstate = lock_user(VERIFY_READ, fxstate_addr,
+                           xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE, 1);
+        if (!xstate) {
+            return false;
+        }
+
+        /*
+         * Check for the presence of second magic word at the end of memory
+         * layout. This detects the case where the user just copied the legacy
+         * fpstate layout with out copying the extended state information
+         * in the memory layout.
+         */
+        magic2 = tswap32(*(uint32_t *)(xstate + xstate_size));
+        if (magic2 != TARGET_FP_XSTATE_MAGIC2) {
+            unlock_user(xstate, fxstate_addr, 0);
+            break;
+        }
+
+        ok = cpu_x86_xrstor(env, xstate, xstate_size, xfeatures);
+        unlock_user(xstate, fxstate_addr, 0);
+        return ok;
+
+    default:
+        break;
+    }
+
+    cpu_x86_fxrstor(env, fxstate, sizeof(*fxstate));
+    return true;
+}
+
+#ifndef TARGET_X86_64
+static bool frstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
+                              struct target_fregs_state *fpstate,
+                              abi_ptr fpstate_addr,
+                              X86LegacyXSaveArea *fxstate,
+                              abi_ptr fxstate_addr)
 {
-    if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) {
-        uint32_t extended_size = tswapl(fxsave->sw_reserved.extended_size);
-        uint32_t xstate_size = tswapl(fxsave->sw_reserved.xstate_size);
-        uint32_t xfeatures_size = xstate_size - TARGET_FXSAVE_SIZE;
-
-        /* Linux checks MAGIC2 using xstate_size, not extended_size.  */
-        if (tswapl(fxsave->sw_reserved.magic1) == TARGET_FP_XSTATE_MAGIC1 &&
-            extended_size >= TARGET_FPSTATE_FXSAVE_OFFSET + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE) {
-            if (!access_ok(env_cpu(env), VERIFY_READ, fxsave_addr,
-                           extended_size - TARGET_FPSTATE_FXSAVE_OFFSET)) {
-                return 1;
-            }
-            if (tswapl(*(uint32_t *) &fxsave->xfeatures[xfeatures_size]) == TARGET_FP_XSTATE_MAGIC2) {
-                cpu_x86_xrstor(env, fxsave_addr);
-                return 0;
-            }
+    switch (fpkind) {
+    case FPSTATE_XSAVE:
+        if (!xrstor_sigcontext(env, fpkind, fxstate, fxstate_addr)) {
+            return false;
         }
-        /* fall through to fxrstor */
+        break;
+    case FPSTATE_FXSAVE:
+        cpu_x86_fxrstor(env, fxstate, sizeof(*fxstate));
+        break;
+    case FPSTATE_FSAVE:
+        break;
+    default:
+        g_assert_not_reached();
     }
 
-    cpu_x86_fxrstor(env, fxsave_addr);
-    return 0;
+    /*
+     * Copy the legacy state because the FP portion of the FX frame has
+     * to be ignored for histerical raisins.  The kernel folds the two
+     * states together and then performs a single load; here we perform
+     * the merge within ENV by loading XSTATE/FXSTATE first, then
+     * overriding with the FSTATE afterward.
+     */
+    cpu_x86_frstor(env, fpstate, sizeof(*fpstate));
+    return true;
 }
+#endif
 
-static int
-restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
+static bool restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
 {
-    int err = 1;
-    abi_ulong fpstate_addr;
-    unsigned int tmpflags;
+    abi_ptr fpstate_addr;
+    unsigned tmpflags, math_size;
+    FPStateKind fpkind;
+    void *fpstate;
+    bool ok;
 
 #ifndef TARGET_X86_64
     cpu_x86_load_seg(env, R_GS, tswap16(sc->gs));
@@ -630,32 +759,34 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
 
     tmpflags = tswapl(sc->eflags);
     env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
-    //          regs->orig_eax = -1;            /* disable syscall checks */
 
     fpstate_addr = tswapl(sc->fpstate);
-    if (fpstate_addr != 0) {
-        struct target_fpstate *fpstate;
-        if (!lock_user_struct(VERIFY_READ, fpstate, fpstate_addr,
-                              sizeof(struct target_fpstate))) {
-            return err;
-        }
+    if (fpstate_addr == 0) {
+        return true;
+    }
+
+    fpkind = get_fpstate_kind(env);
+    math_size = get_fpstate_size(env, fpkind);
 #ifndef TARGET_X86_64
-        if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) {
-            cpu_x86_frstor(env, fpstate_addr, 1);
-            err = 0;
-        } else {
-            err = xrstor_sigcontext(env, &fpstate->fxsave,
-                                    fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET);
-        }
-#else
-        err = xrstor_sigcontext(env, fpstate, fpstate_addr);
+    if (fpkind != FPSTATE_FSAVE) {
+        math_size += sizeof(struct target_fregs_state);
+    }
 #endif
-        unlock_user_struct(fpstate, fpstate_addr, 0);
-    } else {
-        err = 0;
+    fpstate = lock_user(VERIFY_READ, fpstate_addr, math_size, 1);
+    if (!fpstate) {
+        return false;
     }
 
-    return err;
+#ifdef TARGET_X86_64
+    ok = xrstor_sigcontext(env, fpkind, fpstate, fpstate_addr);
+#else
+    ok = frstor_sigcontext(env, fpkind, fpstate, fpstate_addr,
+                           fpstate + sizeof(struct target_fregs_state),
+                           fpstate_addr + sizeof(struct target_fregs_state));
+#endif
+
+    unlock_user(fpstate, fpstate_addr, 0);
+    return ok;
 }
 
 /* Note: there is no sigreturn on x86_64, there is only rt_sigreturn */
@@ -666,29 +797,27 @@ long do_sigreturn(CPUX86State *env)
     abi_ulong frame_addr = env->regs[R_ESP] - 8;
     target_sigset_t target_set;
     sigset_t set;
-    int i;
 
     trace_user_do_sigreturn(env, frame_addr);
-    if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1))
-        goto badframe;
-    /* set blocked signals */
+    if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) {
+        force_sig(TARGET_SIGSEGV);
+        return -QEMU_ESIGRETURN;
+    }
+
+    /* Set blocked signals. */
     __get_user(target_set.sig[0], &frame->sc.oldmask);
-    for(i = 1; i < TARGET_NSIG_WORDS; i++) {
+    for (int i = 1; i < TARGET_NSIG_WORDS; i++) {
         __get_user(target_set.sig[i], &frame->extramask[i - 1]);
     }
-
     target_to_host_sigset_internal(&set, &target_set);
     set_sigmask(&set);
 
-    /* restore registers */
-    if (restore_sigcontext(env, &frame->sc))
-        goto badframe;
-    unlock_user_struct(frame, frame_addr, 0);
-    return -QEMU_ESIGRETURN;
+    /* Restore registers */
+    if (!restore_sigcontext(env, &frame->sc)) {
+        force_sig(TARGET_SIGSEGV);
+    }
 
-badframe:
     unlock_user_struct(frame, frame_addr, 0);
-    force_sig(TARGET_SIGSEGV);
     return -QEMU_ESIGRETURN;
 }
 #endif
@@ -706,7 +835,7 @@ long do_rt_sigreturn(CPUX86State *env)
     target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
     set_sigmask(&set);
 
-    if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) {
+    if (!restore_sigcontext(env, &frame->uc.tuc_mcontext)) {
         goto badframe;
     }
 
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index c500a69a69..c64ef0c1a2 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1425,23 +1425,34 @@ typedef struct {
  */
 #define UNASSIGNED_APIC_ID 0xFFFFFFFF
 
-typedef union X86LegacyXSaveArea {
-    struct {
-        uint16_t fcw;
-        uint16_t fsw;
-        uint8_t ftw;
-        uint8_t reserved;
-        uint16_t fpop;
-        uint64_t fpip;
-        uint64_t fpdp;
-        uint32_t mxcsr;
-        uint32_t mxcsr_mask;
-        FPReg fpregs[8];
-        uint8_t xmm_regs[16][16];
+typedef struct X86LegacyXSaveArea {
+    uint16_t fcw;
+    uint16_t fsw;
+    uint8_t ftw;
+    uint8_t reserved;
+    uint16_t fpop;
+    union {
+        struct {
+            uint64_t fpip;
+            uint64_t fpdp;
+        };
+        struct {
+            uint32_t fip;
+            uint32_t fcs;
+            uint32_t foo;
+            uint32_t fos;
+        };
     };
-    uint8_t data[512];
+    uint32_t mxcsr;
+    uint32_t mxcsr_mask;
+    FPReg fpregs[8];
+    uint8_t xmm_regs[16][16];
+    uint32_t hw_reserved[12];
+    uint32_t sw_reserved[12];
 } X86LegacyXSaveArea;
 
+QEMU_BUILD_BUG_ON(sizeof(X86LegacyXSaveArea) != 512);
+
 typedef struct X86XSaveHeader {
     uint64_t xstate_bv;
     uint64_t xcomp_bv;
@@ -2255,15 +2266,17 @@ int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector,
 /* used for debug or cpu save/restore */
 
 /* cpu-exec.c */
-/* the following helpers are only usable in user mode simulation as
-   they can trigger unexpected exceptions */
+/*
+ * The following helpers are only usable in user mode simulation.
+ * The host pointers should come from lock_user().
+ */
 void cpu_x86_load_seg(CPUX86State *s, X86Seg seg_reg, int selector);
-void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
-void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
-void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr);
-void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr);
-void cpu_x86_xsave(CPUX86State *s, target_ulong ptr);
-void cpu_x86_xrstor(CPUX86State *s, target_ulong ptr);
+void cpu_x86_fsave(CPUX86State *s, void *host, size_t len);
+void cpu_x86_frstor(CPUX86State *s, void *host, size_t len);
+void cpu_x86_fxsave(CPUX86State *s, void *host, size_t len);
+void cpu_x86_fxrstor(CPUX86State *s, void *host, size_t len);
+void cpu_x86_xsave(CPUX86State *s, void *host, size_t len, uint64_t rbfm);
+bool cpu_x86_xrstor(CPUX86State *s, void *host, size_t len, uint64_t rbfm);
 
 /* cpu.c */
 void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
diff --git a/target/i386/tcg/access.c b/target/i386/tcg/access.c
new file mode 100644
index 0000000000..56a1181ea5
--- /dev/null
+++ b/target/i386/tcg/access.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Access guest memory in blocks. */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/cpu_ldst.h"
+#include "exec/exec-all.h"
+#include "access.h"
+
+
+void access_prepare_mmu(X86Access *ret, CPUX86State *env,
+                        vaddr vaddr, unsigned size,
+                        MMUAccessType type, int mmu_idx, uintptr_t ra)
+{
+    int size1, size2;
+    void *haddr1, *haddr2;
+
+    assert(size > 0 && size <= TARGET_PAGE_SIZE);
+
+    size1 = MIN(size, -(vaddr | TARGET_PAGE_MASK)),
+    size2 = size - size1;
+
+    memset(ret, 0, sizeof(*ret));
+    ret->vaddr = vaddr;
+    ret->size = size;
+    ret->size1 = size1;
+    ret->mmu_idx = mmu_idx;
+    ret->env = env;
+    ret->ra = ra;
+
+    haddr1 = probe_access(env, vaddr, size1, type, mmu_idx, ra);
+    ret->haddr1 = haddr1;
+
+    if (unlikely(size2)) {
+        haddr2 = probe_access(env, vaddr + size1, size2, type, mmu_idx, ra);
+        if (haddr2 == haddr1 + size1) {
+            ret->size1 = size;
+        } else {
+#ifdef CONFIG_USER_ONLY
+            g_assert_not_reached();
+#else
+            ret->haddr2 = haddr2;
+#endif
+        }
+    }
+}
+
+void access_prepare(X86Access *ret, CPUX86State *env, vaddr vaddr,
+                    unsigned size, MMUAccessType type, uintptr_t ra)
+{
+    int mmu_idx = cpu_mmu_index(env_cpu(env), false);
+    access_prepare_mmu(ret, env, vaddr, size, type, mmu_idx, ra);
+}
+
+static void *access_ptr(X86Access *ac, vaddr addr, unsigned len)
+{
+    vaddr offset = addr - ac->vaddr;
+
+    assert(addr >= ac->vaddr);
+
+#ifdef CONFIG_USER_ONLY
+    assert(offset <= ac->size1 - len);
+    return ac->haddr1 + offset;
+#else
+    if (likely(offset <= ac->size1 - len)) {
+        return ac->haddr1 + offset;
+    }
+    assert(offset <= ac->size - len);
+    /*
+     * If the address is not naturally aligned, it might span both pages.
+     * Only return ac->haddr2 if the area is entirely within the second page,
+     * otherwise fall back to slow accesses.
+     */
+    if (likely(offset >= ac->size1)) {
+        return ac->haddr2 + (offset - ac->size1);
+    }
+    return NULL;
+#endif
+}
+
+#ifdef CONFIG_USER_ONLY
+# define test_ptr(p)  true
+#else
+# define test_ptr(p)  likely(p)
+#endif
+
+uint8_t access_ldb(X86Access *ac, vaddr addr)
+{
+    void *p = access_ptr(ac, addr, sizeof(uint8_t));
+
+    if (test_ptr(p)) {
+        return ldub_p(p);
+    }
+    return cpu_ldub_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra);
+}
+
+uint16_t access_ldw(X86Access *ac, vaddr addr)
+{
+    void *p = access_ptr(ac, addr, sizeof(uint16_t));
+
+    if (test_ptr(p)) {
+        return lduw_le_p(p);
+    }
+    return cpu_lduw_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra);
+}
+
+uint32_t access_ldl(X86Access *ac, vaddr addr)
+{
+    void *p = access_ptr(ac, addr, sizeof(uint32_t));
+
+    if (test_ptr(p)) {
+        return ldl_le_p(p);
+    }
+    return cpu_ldl_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra);
+}
+
+uint64_t access_ldq(X86Access *ac, vaddr addr)
+{
+    void *p = access_ptr(ac, addr, sizeof(uint64_t));
+
+    if (test_ptr(p)) {
+        return ldq_le_p(p);
+    }
+    return cpu_ldq_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra);
+}
+
+void access_stb(X86Access *ac, vaddr addr, uint8_t val)
+{
+    void *p = access_ptr(ac, addr, sizeof(uint8_t));
+
+    if (test_ptr(p)) {
+        stb_p(p, val);
+    } else {
+        cpu_stb_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra);
+    }
+}
+
+void access_stw(X86Access *ac, vaddr addr, uint16_t val)
+{
+    void *p = access_ptr(ac, addr, sizeof(uint16_t));
+
+    if (test_ptr(p)) {
+        stw_le_p(p, val);
+    } else {
+        cpu_stw_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra);
+    }
+}
+
+void access_stl(X86Access *ac, vaddr addr, uint32_t val)
+{
+    void *p = access_ptr(ac, addr, sizeof(uint32_t));
+
+    if (test_ptr(p)) {
+        stl_le_p(p, val);
+    } else {
+        cpu_stl_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra);
+    }
+}
+
+void access_stq(X86Access *ac, vaddr addr, uint64_t val)
+{
+    void *p = access_ptr(ac, addr, sizeof(uint64_t));
+
+    if (test_ptr(p)) {
+        stq_le_p(p, val);
+    } else {
+        cpu_stq_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra);
+    }
+}
diff --git a/target/i386/tcg/access.h b/target/i386/tcg/access.h
new file mode 100644
index 0000000000..d70808a3a3
--- /dev/null
+++ b/target/i386/tcg/access.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Access guest memory in blocks. */
+
+#ifndef X86_TCG_ACCESS_H
+#define X86_TCG_ACCESS_H
+
+/* An access covers at most sizeof(X86XSaveArea), at most 2 pages. */
+typedef struct X86Access {
+    target_ulong vaddr;
+    void *haddr1;
+    void *haddr2;
+    uint16_t size;
+    uint16_t size1;
+    /*
+     * If we can't access the host page directly, we'll have to do I/O access
+     * via ld/st helpers. These are internal details, so we store the rest
+     * to do the access here instead of passing it around in the helpers.
+     */
+    int mmu_idx;
+    CPUX86State *env;
+    uintptr_t ra;
+} X86Access;
+
+void access_prepare_mmu(X86Access *ret, CPUX86State *env,
+                        vaddr vaddr, unsigned size,
+                        MMUAccessType type, int mmu_idx, uintptr_t ra);
+void access_prepare(X86Access *ret, CPUX86State *env, vaddr vaddr,
+                    unsigned size, MMUAccessType type, uintptr_t ra);
+
+uint8_t  access_ldb(X86Access *ac, vaddr addr);
+uint16_t access_ldw(X86Access *ac, vaddr addr);
+uint32_t access_ldl(X86Access *ac, vaddr addr);
+uint64_t access_ldq(X86Access *ac, vaddr addr);
+
+void access_stb(X86Access *ac, vaddr addr, uint8_t val);
+void access_stw(X86Access *ac, vaddr addr, uint16_t val);
+void access_stl(X86Access *ac, vaddr addr, uint32_t val);
+void access_stq(X86Access *ac, vaddr addr, uint64_t val);
+
+#endif
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 8df8cae631..e322293371 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -27,6 +27,7 @@
 #include "fpu/softfloat.h"
 #include "fpu/softfloat-macros.h"
 #include "helper-tcg.h"
+#include "access.h"
 
 /* float macros */
 #define FT0    (env->ft0)
@@ -84,23 +85,22 @@ static inline void fpop(CPUX86State *env)
     env->fpstt = (env->fpstt + 1) & 7;
 }
 
-static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr)
+static floatx80 do_fldt(X86Access *ac, target_ulong ptr)
 {
     CPU_LDoubleU temp;
 
-    temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
-    temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
+    temp.l.lower = access_ldq(ac, ptr);
+    temp.l.upper = access_ldw(ac, ptr + 8);
     return temp.d;
 }
 
-static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
-                    uintptr_t retaddr)
+static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f)
 {
     CPU_LDoubleU temp;
 
     temp.d = f;
-    cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
-    cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
+    access_stq(ac, ptr, temp.l.lower);
+    access_stw(ac, ptr + 8, temp.l.upper);
 }
 
 /* x87 FPU helpers */
@@ -382,16 +382,22 @@ int64_t helper_fisttll_ST0(CPUX86State *env)
 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 {
     int new_fpstt;
+    X86Access ac;
+
+    access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC());
 
     new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC());
+    env->fpregs[new_fpstt].d = do_fldt(&ac, ptr);
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
 }
 
 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 {
-    do_fstt(env, ST0, ptr, GETPC());
+    X86Access ac;
+
+    access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC());
+    do_fstt(&ac, ptr, ST0);
 }
 
 void helper_fpush(CPUX86State *env)
@@ -769,18 +775,21 @@ void helper_fninit(CPUX86State *env)
 
 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 {
+    X86Access ac;
     floatx80 tmp;
     uint64_t val;
     unsigned int v;
     int i;
 
+    access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC());
+
     val = 0;
     for (i = 8; i >= 0; i--) {
-        v = cpu_ldub_data_ra(env, ptr + i, GETPC());
+        v = access_ldb(&ac, ptr + i);
         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
     }
     tmp = int64_to_floatx80(val, &env->fp_status);
-    if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
+    if (access_ldb(&ac, ptr + 9) & 0x80) {
         tmp = floatx80_chs(tmp);
     }
     fpush(env);
@@ -794,7 +803,9 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
     target_ulong mem_ref, mem_end;
     int64_t val;
     CPU_LDoubleU temp;
+    X86Access ac;
 
+    access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC());
     temp.d = ST0;
 
     val = floatx80_to_int64(ST0, &env->fp_status);
@@ -802,20 +813,20 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
     if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
         set_float_exception_flags(float_flag_invalid, &env->fp_status);
         while (mem_ref < ptr + 7) {
-            cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
+            access_stb(&ac, mem_ref++, 0);
         }
-        cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
-        cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
-        cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
+        access_stb(&ac, mem_ref++, 0xc0);
+        access_stb(&ac, mem_ref++, 0xff);
+        access_stb(&ac, mem_ref++, 0xff);
         merge_exception_flags(env, old_flags);
         return;
     }
     mem_end = mem_ref + 9;
     if (SIGND(temp)) {
-        cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
+        access_stb(&ac, mem_end, 0x80);
         val = -val;
     } else {
-        cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
+        access_stb(&ac, mem_end, 0x00);
     }
     while (mem_ref < mem_end) {
         if (val == 0) {
@@ -824,10 +835,10 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
         v = val % 100;
         val = val / 100;
         v = ((v / 10) << 4) | (v % 10);
-        cpu_stb_data_ra(env, mem_ref++, v, GETPC());
+        access_stb(&ac, mem_ref++, v);
     }
     while (mem_ref < mem_end) {
-        cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
+        access_stb(&ac, mem_ref++, 0);
     }
     merge_exception_flags(env, old_flags);
 }
@@ -2364,9 +2375,9 @@ void helper_fxam_ST0(CPUX86State *env)
     }
 }
 
-static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
-                      uintptr_t retaddr)
+static void do_fstenv(X86Access *ac, target_ulong ptr, int data32)
 {
+    CPUX86State *env = ac->env;
     int fpus, fptag, exp, i;
     uint64_t mant;
     CPU_LDoubleU tmp;
@@ -2393,28 +2404,31 @@ static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
     }
     if (data32) {
         /* 32 bit */
-        cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
-        cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
-        cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
-        cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */
-        cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */
-        cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */
-        cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */
+        access_stl(ac, ptr, env->fpuc);
+        access_stl(ac, ptr + 4, fpus);
+        access_stl(ac, ptr + 8, fptag);
+        access_stl(ac, ptr + 12, env->fpip); /* fpip */
+        access_stl(ac, ptr + 16, env->fpcs); /* fpcs */
+        access_stl(ac, ptr + 20, env->fpdp); /* fpoo */
+        access_stl(ac, ptr + 24, env->fpds); /* fpos */
     } else {
         /* 16 bit */
-        cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
-        cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
-        cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
-        cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr);
-        cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr);
-        cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr);
-        cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr);
+        access_stw(ac, ptr, env->fpuc);
+        access_stw(ac, ptr + 2, fpus);
+        access_stw(ac, ptr + 4, fptag);
+        access_stw(ac, ptr + 6, env->fpip);
+        access_stw(ac, ptr + 8, env->fpcs);
+        access_stw(ac, ptr + 10, env->fpdp);
+        access_stw(ac, ptr + 12, env->fpds);
     }
 }
 
 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
 {
-    do_fstenv(env, ptr, data32, GETPC());
+    X86Access ac;
+
+    access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
+    do_fstenv(&ac, ptr, data32);
 }
 
 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
@@ -2433,20 +2447,15 @@ static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
 #endif
 }
 
-static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
-                      uintptr_t retaddr)
+static void do_fldenv(X86Access *ac, target_ulong ptr, int data32)
 {
     int i, fpus, fptag;
+    CPUX86State *env = ac->env;
+
+    cpu_set_fpuc(env, access_ldw(ac, ptr));
+    fpus = access_ldw(ac, ptr + (2 << data32));
+    fptag = access_ldw(ac, ptr + (4 << data32));
 
-    if (data32) {
-        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
-        fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
-        fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
-    } else {
-        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
-        fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
-        fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
-    }
     cpu_set_fpus(env, fpus);
     for (i = 0; i < 8; i++) {
         env->fptags[i] = ((fptag & 3) == 3);
@@ -2456,21 +2465,22 @@ static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
 
 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
 {
-    do_fldenv(env, ptr, data32, GETPC());
+    X86Access ac;
+
+    access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
+    do_fldenv(&ac, ptr, data32);
 }
 
-static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
-                     uintptr_t retaddr)
+static void do_fsave(X86Access *ac, target_ulong ptr, int data32)
 {
-    floatx80 tmp;
-    int i;
+    CPUX86State *env = ac->env;
 
-    do_fstenv(env, ptr, data32, retaddr);
+    do_fstenv(ac, ptr, data32);
+    ptr += 14 << data32;
 
-    ptr += (target_ulong)14 << data32;
-    for (i = 0; i < 8; i++) {
-        tmp = ST(i);
-        do_fstt(env, tmp, ptr, retaddr);
+    for (int i = 0; i < 8; i++) {
+        floatx80 tmp = ST(i);
+        do_fstt(ac, ptr, tmp);
         ptr += 10;
     }
 
@@ -2479,20 +2489,22 @@ static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
 
 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
 {
-    do_fsave(env, ptr, data32, GETPC());
+    int size = (14 << data32) + 80;
+    X86Access ac;
+
+    access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC());
+    do_fsave(&ac, ptr, data32);
 }
 
-static void do_frstor(CPUX86State *env, target_ulong ptr, int data32,
-                      uintptr_t retaddr)
+static void do_frstor(X86Access *ac, target_ulong ptr, int data32)
 {
-    floatx80 tmp;
-    int i;
+    CPUX86State *env = ac->env;
 
-    do_fldenv(env, ptr, data32, retaddr);
-    ptr += (target_ulong)14 << data32;
+    do_fldenv(ac, ptr, data32);
+    ptr += 14 << data32;
 
-    for (i = 0; i < 8; i++) {
-        tmp = do_fldt(env, ptr, retaddr);
+    for (int i = 0; i < 8; i++) {
+        floatx80 tmp = do_fldt(ac, ptr);
         ST(i) = tmp;
         ptr += 10;
     }
@@ -2500,13 +2512,18 @@ static void do_frstor(CPUX86State *env, target_ulong ptr, int data32,
 
 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
 {
-    do_frstor(env, ptr, data32, GETPC());
+    int size = (14 << data32) + 80;
+    X86Access ac;
+
+    access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC());
+    do_frstor(&ac, ptr, data32);
 }
 
 #define XO(X)  offsetof(X86XSaveArea, X)
 
-static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xsave_fpu(X86Access *ac, target_ulong ptr)
 {
+    CPUX86State *env = ac->env;
     int fpus, fptag, i;
     target_ulong addr;
 
@@ -2516,33 +2533,37 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
         fptag |= (env->fptags[i] << i);
     }
 
-    cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
-    cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
-    cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
+    access_stw(ac, ptr + XO(legacy.fcw), env->fpuc);
+    access_stw(ac, ptr + XO(legacy.fsw), fpus);
+    access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff);
 
     /* In 32-bit mode this is eip, sel, dp, sel.
        In 64-bit mode this is rip, rdp.
        But in either case we don't write actual data, just zeros.  */
-    cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
-    cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
+    access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */
+    access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */
 
     addr = ptr + XO(legacy.fpregs);
+
     for (i = 0; i < 8; i++) {
         floatx80 tmp = ST(i);
-        do_fstt(env, tmp, addr, ra);
+        do_fstt(ac, addr, tmp);
         addr += 16;
     }
 }
 
-static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr)
 {
+    CPUX86State *env = ac->env;
+
     update_mxcsr_from_sse_status(env);
-    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
-    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
+    access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr);
+    access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff);
 }
 
-static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xsave_sse(X86Access *ac, target_ulong ptr)
 {
+    CPUX86State *env = ac->env;
     int i, nb_xmm_regs;
     target_ulong addr;
 
@@ -2554,14 +2575,15 @@ static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 
     addr = ptr + XO(legacy.xmm_regs);
     for (i = 0; i < nb_xmm_regs; i++) {
-        cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
-        cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
+        access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0));
+        access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1));
         addr += 16;
     }
 }
 
-static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xsave_ymmh(X86Access *ac, target_ulong ptr)
 {
+    CPUX86State *env = ac->env;
     int i, nb_xmm_regs;
 
     if (env->hflags & HF_CS64_MASK) {
@@ -2571,58 +2593,67 @@ static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
     }
 
     for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
-        cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra);
-        cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra);
+        access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2));
+        access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3));
     }
 }
 
-static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xsave_bndregs(X86Access *ac, target_ulong ptr)
 {
+    CPUX86State *env = ac->env;
     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
     int i;
 
     for (i = 0; i < 4; i++, addr += 16) {
-        cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
-        cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
+        access_stq(ac, addr, env->bnd_regs[i].lb);
+        access_stq(ac, addr + 8, env->bnd_regs[i].ub);
     }
 }
 
-static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr)
 {
-    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
-                    env->bndcs_regs.cfgu, ra);
-    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
-                    env->bndcs_regs.sts, ra);
+    CPUX86State *env = ac->env;
+
+    access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
+               env->bndcs_regs.cfgu);
+    access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
+               env->bndcs_regs.sts);
 }
 
-static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xsave_pkru(X86Access *ac, target_ulong ptr)
 {
-    cpu_stq_data_ra(env, ptr, env->pkru, ra);
+    access_stq(ac, ptr, ac->env->pkru);
 }
 
-static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_fxsave(X86Access *ac, target_ulong ptr)
 {
-    /* The operand must be 16 byte aligned */
-    if (ptr & 0xf) {
-        raise_exception_ra(env, EXCP0D_GPF, ra);
-    }
-
-    do_xsave_fpu(env, ptr, ra);
+    CPUX86State *env = ac->env;
 
+    do_xsave_fpu(ac, ptr);
     if (env->cr[4] & CR4_OSFXSR_MASK) {
-        do_xsave_mxcsr(env, ptr, ra);
+        do_xsave_mxcsr(ac, ptr);
         /* Fast FXSAVE leaves out the XMM registers */
         if (!(env->efer & MSR_EFER_FFXSR)
             || (env->hflags & HF_CPL_MASK)
             || !(env->hflags & HF_LMA_MASK)) {
-            do_xsave_sse(env, ptr, ra);
+            do_xsave_sse(ac, ptr);
         }
     }
 }
 
 void helper_fxsave(CPUX86State *env, target_ulong ptr)
 {
-    do_fxsave(env, ptr, GETPC());
+    uintptr_t ra = GETPC();
+    X86Access ac;
+
+    /* The operand must be 16 byte aligned */
+    if (ptr & 0xf) {
+        raise_exception_ra(env, EXCP0D_GPF, ra);
+    }
+
+    access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
+                   MMU_DATA_STORE, ra);
+    do_fxsave(&ac, ptr);
 }
 
 static uint64_t get_xinuse(CPUX86State *env)
@@ -2639,57 +2670,73 @@ static uint64_t get_xinuse(CPUX86State *env)
     return inuse;
 }
 
-static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
-                     uint64_t inuse, uint64_t opt, uintptr_t ra)
+static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm,
+                            uint64_t inuse, uint64_t opt)
 {
     uint64_t old_bv, new_bv;
 
-    /* The OS must have enabled XSAVE.  */
-    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
-        raise_exception_ra(env, EXCP06_ILLOP, ra);
-    }
-
-    /* The operand must be 64 byte aligned.  */
-    if (ptr & 63) {
-        raise_exception_ra(env, EXCP0D_GPF, ra);
-    }
-
-    /* Never save anything not enabled by XCR0.  */
-    rfbm &= env->xcr0;
-    opt &= rfbm;
-
     if (opt & XSTATE_FP_MASK) {
-        do_xsave_fpu(env, ptr, ra);
+        do_xsave_fpu(ac, ptr);
     }
     if (rfbm & XSTATE_SSE_MASK) {
         /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
-        do_xsave_mxcsr(env, ptr, ra);
+        do_xsave_mxcsr(ac, ptr);
     }
     if (opt & XSTATE_SSE_MASK) {
-        do_xsave_sse(env, ptr, ra);
+        do_xsave_sse(ac, ptr);
     }
     if (opt & XSTATE_YMM_MASK) {
-        do_xsave_ymmh(env, ptr + XO(avx_state), ra);
+        do_xsave_ymmh(ac, ptr + XO(avx_state));
     }
     if (opt & XSTATE_BNDREGS_MASK) {
-        do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
+        do_xsave_bndregs(ac, ptr + XO(bndreg_state));
     }
     if (opt & XSTATE_BNDCSR_MASK) {
-        do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
+        do_xsave_bndcsr(ac, ptr + XO(bndcsr_state));
     }
     if (opt & XSTATE_PKRU_MASK) {
-        do_xsave_pkru(env, ptr + XO(pkru_state), ra);
+        do_xsave_pkru(ac, ptr + XO(pkru_state));
     }
 
     /* Update the XSTATE_BV field.  */
-    old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
+    old_bv = access_ldq(ac, ptr + XO(header.xstate_bv));
     new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
-    cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
+    access_stq(ac, ptr + XO(header.xstate_bv), new_bv);
+}
+
+static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+{
+    /* The OS must have enabled XSAVE.  */
+    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
+        raise_exception_ra(env, EXCP06_ILLOP, ra);
+    }
+
+    /* The operand must be 64 byte aligned.  */
+    if (ptr & 63) {
+        raise_exception_ra(env, EXCP0D_GPF, ra);
+    }
+}
+
+static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
+                     uint64_t inuse, uint64_t opt, uintptr_t ra)
+{
+    X86Access ac;
+    unsigned size;
+
+    do_xsave_chk(env, ptr, ra);
+
+    /* Never save anything not enabled by XCR0.  */
+    rfbm &= env->xcr0;
+    opt &= rfbm;
+    size = xsave_area_size(opt, false);
+
+    access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra);
+    do_xsave_access(&ac, ptr, rfbm, inuse, opt);
 }
 
 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
 {
-    do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
+    do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC());
 }
 
 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
@@ -2698,36 +2745,41 @@ void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
     do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
 }
 
-static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_fpu(X86Access *ac, target_ulong ptr)
 {
+    CPUX86State *env = ac->env;
     int i, fpuc, fpus, fptag;
     target_ulong addr;
 
-    fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
-    fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
-    fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
+    fpuc = access_ldw(ac, ptr + XO(legacy.fcw));
+    fpus = access_ldw(ac, ptr + XO(legacy.fsw));
+    fptag = access_ldw(ac, ptr + XO(legacy.ftw));
     cpu_set_fpuc(env, fpuc);
     cpu_set_fpus(env, fpus);
+
     fptag ^= 0xff;
     for (i = 0; i < 8; i++) {
         env->fptags[i] = ((fptag >> i) & 1);
     }
 
     addr = ptr + XO(legacy.fpregs);
+
     for (i = 0; i < 8; i++) {
-        floatx80 tmp = do_fldt(env, addr, ra);
+        floatx80 tmp = do_fldt(ac, addr);
         ST(i) = tmp;
         addr += 16;
     }
 }
 
-static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr)
 {
-    cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
+    CPUX86State *env = ac->env;
+    cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr)));
 }
 
-static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_sse(X86Access *ac, target_ulong ptr)
 {
+    CPUX86State *env = ac->env;
     int i, nb_xmm_regs;
     target_ulong addr;
 
@@ -2739,8 +2791,8 @@ static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 
     addr = ptr + XO(legacy.xmm_regs);
     for (i = 0; i < nb_xmm_regs; i++) {
-        env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
-        env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
+        env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr);
+        env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8);
         addr += 16;
     }
 }
@@ -2761,8 +2813,9 @@ static void do_clear_sse(CPUX86State *env)
     }
 }
 
-static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr)
 {
+    CPUX86State *env = ac->env;
     int i, nb_xmm_regs;
 
     if (env->hflags & HF_CS64_MASK) {
@@ -2772,8 +2825,8 @@ static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
     }
 
     for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
-        env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra);
-        env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra);
+        env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr);
+        env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8);
     }
 }
 
@@ -2793,100 +2846,97 @@ static void do_clear_ymmh(CPUX86State *env)
     }
 }
 
-static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr)
 {
+    CPUX86State *env = ac->env;
     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
     int i;
 
     for (i = 0; i < 4; i++, addr += 16) {
-        env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
-        env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
+        env->bnd_regs[i].lb = access_ldq(ac, addr);
+        env->bnd_regs[i].ub = access_ldq(ac, addr + 8);
     }
 }
 
-static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr)
 {
+    CPUX86State *env = ac->env;
+
     /* FIXME: Extend highest implemented bit of linear address.  */
     env->bndcs_regs.cfgu
-        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
+        = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu));
     env->bndcs_regs.sts
-        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
+        = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts));
 }
 
-static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_pkru(X86Access *ac, target_ulong ptr)
 {
-    env->pkru = cpu_ldq_data_ra(env, ptr, ra);
+    ac->env->pkru = access_ldq(ac, ptr);
 }
 
-static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_fxrstor(X86Access *ac, target_ulong ptr)
 {
-    /* The operand must be 16 byte aligned */
-    if (ptr & 0xf) {
-        raise_exception_ra(env, EXCP0D_GPF, ra);
-    }
-
-    do_xrstor_fpu(env, ptr, ra);
+    CPUX86State *env = ac->env;
 
+    do_xrstor_fpu(ac, ptr);
     if (env->cr[4] & CR4_OSFXSR_MASK) {
-        do_xrstor_mxcsr(env, ptr, ra);
+        do_xrstor_mxcsr(ac, ptr);
         /* Fast FXRSTOR leaves out the XMM registers */
         if (!(env->efer & MSR_EFER_FFXSR)
             || (env->hflags & HF_CPL_MASK)
             || !(env->hflags & HF_LMA_MASK)) {
-            do_xrstor_sse(env, ptr, ra);
+            do_xrstor_sse(ac, ptr);
         }
     }
 }
 
 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
 {
-    do_fxrstor(env, ptr, GETPC());
+    uintptr_t ra = GETPC();
+    X86Access ac;
+
+    /* The operand must be 16 byte aligned */
+    if (ptr & 0xf) {
+        raise_exception_ra(env, EXCP0D_GPF, ra);
+    }
+
+    access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
+                   MMU_DATA_LOAD, ra);
+    do_fxrstor(&ac, ptr);
 }
 
-static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra)
+static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv,
+                                target_ulong ptr)
 {
     uint64_t xstate_bv, xcomp_bv, reserve0;
 
-    rfbm &= env->xcr0;
+    xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv));
+    xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv));
+    reserve0 = access_ldq(ac, ptr + XO(header.reserve0));
+    *pxsbv = xstate_bv;
 
-    /* The OS must have enabled XSAVE.  */
-    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
-        raise_exception_ra(env, EXCP06_ILLOP, ra);
-    }
-
-    /* The operand must be 64 byte aligned.  */
-    if (ptr & 63) {
-        raise_exception_ra(env, EXCP0D_GPF, ra);
-    }
-
-    xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
-
-    if ((int64_t)xstate_bv < 0) {
-        /* FIXME: Compact form.  */
-        raise_exception_ra(env, EXCP0D_GPF, ra);
+    /*
+     * XCOMP_BV bit 63 indicates compact form, which we do not support,
+     * and thus must raise #GP.  That leaves us in standard form.
+     * In standard form, bytes 23:8 must be zero -- which is both
+     * XCOMP_BV and the following 64-bit field.
+     */
+    if (xcomp_bv || reserve0) {
+        return false;
     }
 
-    /* Standard form.  */
-
     /* The XSTATE_BV field must not set bits not present in XCR0.  */
-    if (xstate_bv & ~env->xcr0) {
-        raise_exception_ra(env, EXCP0D_GPF, ra);
-    }
+    return (xstate_bv & ~ac->env->xcr0) == 0;
+}
 
-    /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
-       revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
-       describes only XCOMP_BV, but the description of the standard form
-       of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
-       includes the next 64-bit field.  */
-    xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
-    reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
-    if (xcomp_bv || reserve0) {
-        raise_exception_ra(env, EXCP0D_GPF, ra);
-    }
+static void do_xrstor(X86Access *ac, target_ulong ptr,
+                      uint64_t rfbm, uint64_t xstate_bv)
+{
+    CPUX86State *env = ac->env;
 
     if (rfbm & XSTATE_FP_MASK) {
         if (xstate_bv & XSTATE_FP_MASK) {
-            do_xrstor_fpu(env, ptr, ra);
+            do_xrstor_fpu(ac, ptr);
         } else {
             do_fninit(env);
             memset(env->fpregs, 0, sizeof(env->fpregs));
@@ -2895,23 +2945,23 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
     if (rfbm & XSTATE_SSE_MASK) {
         /* Note that the standard form of XRSTOR loads MXCSR from memory
            whether or not the XSTATE_BV bit is set.  */
-        do_xrstor_mxcsr(env, ptr, ra);
+        do_xrstor_mxcsr(ac, ptr);
         if (xstate_bv & XSTATE_SSE_MASK) {
-            do_xrstor_sse(env, ptr, ra);
+            do_xrstor_sse(ac, ptr);
         } else {
             do_clear_sse(env);
         }
     }
     if (rfbm & XSTATE_YMM_MASK) {
         if (xstate_bv & XSTATE_YMM_MASK) {
-            do_xrstor_ymmh(env, ptr + XO(avx_state), ra);
+            do_xrstor_ymmh(ac, ptr + XO(avx_state));
         } else {
             do_clear_ymmh(env);
         }
     }
     if (rfbm & XSTATE_BNDREGS_MASK) {
         if (xstate_bv & XSTATE_BNDREGS_MASK) {
-            do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
+            do_xrstor_bndregs(ac, ptr + XO(bndreg_state));
             env->hflags |= HF_MPX_IU_MASK;
         } else {
             memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
@@ -2920,7 +2970,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
     }
     if (rfbm & XSTATE_BNDCSR_MASK) {
         if (xstate_bv & XSTATE_BNDCSR_MASK) {
-            do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
+            do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state));
         } else {
             memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
         }
@@ -2929,7 +2979,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
     if (rfbm & XSTATE_PKRU_MASK) {
         uint64_t old_pkru = env->pkru;
         if (xstate_bv & XSTATE_PKRU_MASK) {
-            do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
+            do_xrstor_pkru(ac, ptr + XO(pkru_state));
         } else {
             env->pkru = 0;
         }
@@ -2944,38 +2994,117 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
 
 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
 {
-    do_xrstor(env, ptr, rfbm, GETPC());
+    uintptr_t ra = GETPC();
+    X86Access ac;
+    uint64_t xstate_bv;
+    unsigned size, size_ext;
+
+    do_xsave_chk(env, ptr, ra);
+
+    /* Begin with just the minimum size to validate the header. */
+    size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader);
+    access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra);
+    if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) {
+        raise_exception_ra(env, EXCP0D_GPF, ra);
+    }
+
+    rfbm &= env->xcr0;
+    size_ext = xsave_area_size(rfbm & xstate_bv, false);
+    if (size < size_ext) {
+        /* TODO: See if existing page probe has covered extra size. */
+        access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra);
+    }
+
+    do_xrstor(&ac, ptr, rfbm, xstate_bv);
 }
 
 #if defined(CONFIG_USER_ONLY)
-void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
+void cpu_x86_fsave(CPUX86State *env, void *host, size_t len)
 {
-    do_fsave(env, ptr, data32, 0);
+    X86Access ac = {
+        .haddr1 = host,
+        .size = 4 * 7 + 8 * 10,
+        .env = env,
+    };
+
+    assert(ac.size <= len);
+    do_fsave(&ac, 0, true);
 }
 
-void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void cpu_x86_frstor(CPUX86State *env, void *host, size_t len)
 {
-    do_frstor(env, ptr, data32, 0);
+    X86Access ac = {
+        .haddr1 = host,
+        .size = 4 * 7 + 8 * 10,
+        .env = env,
+    };
+
+    assert(ac.size <= len);
+    do_frstor(&ac, 0, true);
 }
 
-void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
+void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len)
 {
-    do_fxsave(env, ptr, 0);
+    X86Access ac = {
+        .haddr1 = host,
+        .size = sizeof(X86LegacyXSaveArea),
+        .env = env,
+    };
+
+    assert(ac.size <= len);
+    do_fxsave(&ac, 0);
 }
 
-void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
+void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len)
 {
-    do_fxrstor(env, ptr, 0);
+    X86Access ac = {
+        .haddr1 = host,
+        .size = sizeof(X86LegacyXSaveArea),
+        .env = env,
+    };
+
+    assert(ac.size <= len);
+    do_fxrstor(&ac, 0);
 }
 
-void cpu_x86_xsave(CPUX86State *env, target_ulong ptr)
+void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm)
 {
-    do_xsave(env, ptr, -1, get_xinuse(env), -1, 0);
+    X86Access ac = {
+        .haddr1 = host,
+        .env = env,
+    };
+
+    /*
+     * Since this is only called from user-level signal handling,
+     * we should have done the job correctly there.
+     */
+    assert((rfbm & ~env->xcr0) == 0);
+    ac.size = xsave_area_size(rfbm, false);
+    assert(ac.size <= len);
+    do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm);
 }
 
-void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr)
+bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm)
 {
-    do_xrstor(env, ptr, -1, 0);
+    X86Access ac = {
+        .haddr1 = host,
+        .env = env,
+    };
+    uint64_t xstate_bv;
+
+    /*
+     * Since this is only called from user-level signal handling,
+     * we should have done the job correctly there.
+     */
+    assert((rfbm & ~env->xcr0) == 0);
+    ac.size = xsave_area_size(rfbm, false);
+    assert(ac.size <= len);
+
+    if (!valid_xrstor_header(&ac, &xstate_bv, 0)) {
+        return false;
+    }
+    do_xrstor(&ac, 0, rfbm, xstate_bv);
+    return true;
 }
 #endif
 
diff --git a/target/i386/tcg/meson.build b/target/i386/tcg/meson.build
index f9110e890c..1105b35d92 100644
--- a/target/i386/tcg/meson.build
+++ b/target/i386/tcg/meson.build
@@ -1,4 +1,5 @@
 i386_ss.add(when: 'CONFIG_TCG', if_true: files(
+  'access.c',
   'bpt_helper.c',
   'cc_helper.c',
   'excp_helper.c',
diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target
index e64aab1b81..5fedf22117 100644
--- a/tests/tcg/x86_64/Makefile.target
+++ b/tests/tcg/x86_64/Makefile.target
@@ -13,6 +13,7 @@ X86_64_TESTS += vsyscall
 X86_64_TESTS += noexec
 X86_64_TESTS += cmpxchg
 X86_64_TESTS += adox
+X86_64_TESTS += test-1648
 TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64
 else
 TESTS=$(MULTIARCH_TESTS)
diff --git a/tests/tcg/x86_64/test-1648.c b/tests/tcg/x86_64/test-1648.c
new file mode 100644
index 0000000000..fd0644a8ce
--- /dev/null
+++ b/tests/tcg/x86_64/test-1648.c
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* See https://gitlab.com/qemu-project/qemu/-/issues/1648 */
+
+#include <signal.h>
+
+__attribute__((noinline))
+void bar(void)
+{
+    /* Success! Continue through sigreturn. */
+}
+
+/*
+ * Because of the change of ABI between foo and bar, the compiler is
+ * required to save XMM6-XMM15.  The compiler will use MOVAPS or MOVDQA,
+ * which will trap if the stack frame is not 16 byte aligned.
+ */
+__attribute__((noinline, ms_abi))
+void foo(void)
+{
+    bar();
+}
+
+void sighandler(int num)
+{
+    foo();
+}
+
+int main(void)
+{
+    signal(SIGUSR1, sighandler);
+    raise(SIGUSR1);
+    return 0;
+}