summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2022-11-01 09:36:13 -0400
committerStefan Hajnoczi <stefanha@redhat.com>2022-11-01 09:36:13 -0400
commit18cd31ff30883707408c7d6d952310189903939e (patch)
treecf0d18ede8d3bd6d0cf1fe38f4a2ab997df77b6b
parent7ef1d48763d6c6e90d1a4e1dc04b1125e77c2aee (diff)
parent83d92559cdf0ce842e52e5bbf230f7f62a6206aa (diff)
downloadfocaccia-qemu-18cd31ff30883707408c7d6d952310189903939e.tar.gz
focaccia-qemu-18cd31ff30883707408c7d6d952310189903939e.zip
Merge tag 'pull-tcg-20221031-2' of https://gitlab.com/rth7680/qemu into staging
Remove sparc32plus support from tcg/sparc.
target/i386: Use cpu_unwind_state_data for tpr access.
target/i386: Expand eflags updates inline
Complete cpu initialization before registration

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmNgQvIdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV9bxAf/X6904X+2I55LTMP7
# jLCxMAlSgFiwaWW4sQLvfUS2qjjMNw7dtljF0HaYVJCawABI4tIY3nEYL8dhLiGU
# WpMTmDIY/cBrQ0aMWfUTGRIFZOIpCLsZwiG6zW6w5KxfKUaakeZSgxqhzgFFcM2k
# UDb9HYC6jwEDDZJYTRpcTIsnYHjaiu/ofKjbjWoslq9DIrThLr1UZgoOxzZ9w2Rh
# xEDBNnD42Kzb0Lbc5B1cX4tla43g9KfHkfG6Ww3fJVYZcFxFhAp40y1chtq5qaia
# 64cPOfSdjoHWaZKdXop3hDYvqRTour56S+e1n1VxHVhbsVRh0KKYBvzAZtiN4FEu
# w8E8bA==
# =fxqA
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 31 Oct 2022 17:49:38 EDT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* tag 'pull-tcg-20221031-2' of https://gitlab.com/rth7680/qemu:
  tests/tcg/multiarch: Add munmap-pthread.c
  accel/tcg: Complete cpu initialization before registration
  target/i386: Expand eflags updates inline
  accel/tcg: Remove reset_icount argument from cpu_restore_state_from_tb
  accel/tcg: Remove will_exit argument from cpu_restore_state
  target/openrisc: Use cpu_unwind_state_data for mfspr
  target/openrisc: Always exit after mtspr npc
  target/i386: Use cpu_unwind_state_data for tpr access
  accel/tcg: Introduce cpu_unwind_state_data
  tcg/tci: fix logic error when registering helpers via FFI
  tcg/sparc64: Remove sparc32plus constraints
  tcg/sparc64: Rename from tcg/sparc
  tcg/sparc: Remove support for sparc32plus

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r--MAINTAINERS2
-rw-r--r--accel/tcg/cpu-exec-common.c2
-rw-r--r--accel/tcg/cpu-exec.c8
-rw-r--r--accel/tcg/internal.h4
-rw-r--r--accel/tcg/tb-maint.c4
-rw-r--r--accel/tcg/translate-all.c107
-rw-r--r--cpu.c10
-rw-r--r--include/exec/exec-all.h24
-rw-r--r--meson.build4
-rw-r--r--target/alpha/helper.c2
-rw-r--r--target/alpha/mem_helper.c2
-rw-r--r--target/arm/op_helper.c2
-rw-r--r--target/arm/tlb_helper.c8
-rw-r--r--target/cris/helper.c2
-rw-r--r--target/i386/helper.c25
-rw-r--r--target/i386/helper.h5
-rw-r--r--target/i386/tcg/cc_helper.c41
-rw-r--r--target/i386/tcg/sysemu/svm_helper.c2
-rw-r--r--target/i386/tcg/translate.c30
-rw-r--r--target/m68k/op_helper.c4
-rw-r--r--target/microblaze/helper.c2
-rw-r--r--target/nios2/op_helper.c2
-rw-r--r--target/openrisc/sys_helper.c17
-rw-r--r--target/ppc/excp_helper.c2
-rw-r--r--target/s390x/tcg/excp_helper.c2
-rw-r--r--target/tricore/op_helper.c2
-rw-r--r--target/xtensa/helper.c6
-rw-r--r--tcg/sparc64/tcg-target-con-set.h (renamed from tcg/sparc/tcg-target-con-set.h)16
-rw-r--r--tcg/sparc64/tcg-target-con-str.h (renamed from tcg/sparc/tcg-target-con-str.h)3
-rw-r--r--tcg/sparc64/tcg-target.c.inc (renamed from tcg/sparc/tcg-target.c.inc)277
-rw-r--r--tcg/sparc64/tcg-target.h (renamed from tcg/sparc/tcg-target.h)11
-rw-r--r--tcg/tcg.c81
-rw-r--r--tests/tcg/multiarch/Makefile.target3
-rw-r--r--tests/tcg/multiarch/munmap-pthread.c79
34 files changed, 340 insertions, 451 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 223afc2e13..7c560cec0d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3372,7 +3372,7 @@ L: qemu-s390x@nongnu.org
 
 SPARC TCG target
 S: Odd Fixes
-F: tcg/sparc/
+F: tcg/sparc64/
 F: disas/sparc.c
 
 TCI TCG target
diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c
index be6fe45aa5..c7bc8c6efa 100644
--- a/accel/tcg/cpu-exec-common.c
+++ b/accel/tcg/cpu-exec-common.c
@@ -71,7 +71,7 @@ void cpu_loop_exit(CPUState *cpu)
 void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
 {
     if (pc) {
-        cpu_restore_state(cpu, pc, true);
+        cpu_restore_state(cpu, pc);
     }
     cpu_loop_exit(cpu);
 }
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 82b06c1824..356fe348de 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -1052,23 +1052,25 @@ void tcg_exec_realizefn(CPUState *cpu, Error **errp)
         cc->tcg_ops->initialize();
         tcg_target_initialized = true;
     }
-    tlb_init(cpu);
-    qemu_plugin_vcpu_init_hook(cpu);
 
+    cpu->tb_jmp_cache = g_new0(CPUJumpCache, 1);
+    tlb_init(cpu);
 #ifndef CONFIG_USER_ONLY
     tcg_iommu_init_notifier_list(cpu);
 #endif /* !CONFIG_USER_ONLY */
+    /* qemu_plugin_vcpu_init_hook delayed until cpu_index assigned. */
 }
 
 /* undo the initializations in reverse order */
 void tcg_exec_unrealizefn(CPUState *cpu)
 {
+    qemu_plugin_vcpu_exit_hook(cpu);
 #ifndef CONFIG_USER_ONLY
     tcg_iommu_free_notifier_list(cpu);
 #endif /* !CONFIG_USER_ONLY */
 
-    qemu_plugin_vcpu_exit_hook(cpu);
     tlb_destroy(cpu);
+    g_free(cpu->tb_jmp_cache);
 }
 
 #ifndef CONFIG_USER_ONLY
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
index 1227bb69bd..cb13bade4f 100644
--- a/accel/tcg/internal.h
+++ b/accel/tcg/internal.h
@@ -106,8 +106,8 @@ void tb_reset_jump(TranslationBlock *tb, int n);
 TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
                                tb_page_addr_t phys_page2);
 bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc);
-int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
-                              uintptr_t searched_pc, bool reset_icount);
+void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
+                               uintptr_t host_pc);
 
 /* Return the current PC from CPU, which may be cached in TB. */
 static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index c8e921089d..0cdb35548c 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -536,7 +536,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
                  * restore the CPU state.
                  */
                 current_tb_modified = true;
-                cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
+                cpu_restore_state_from_tb(cpu, current_tb, retaddr);
             }
 #endif /* TARGET_HAS_PRECISE_SMC */
             tb_phys_invalidate__locked(tb);
@@ -685,7 +685,7 @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
              * function to partially restore the CPU state.
              */
             current_tb_modified = true;
-            cpu_restore_state_from_tb(cpu, current_tb, pc, true);
+            cpu_restore_state_from_tb(cpu, current_tb, pc);
         }
 #endif /* TARGET_HAS_PRECISE_SMC */
         tb_phys_invalidate(tb, addr);
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index f185356a36..921944a5ab 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -247,52 +247,65 @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
     return p - block;
 }
 
-/* The cpu state corresponding to 'searched_pc' is restored.
- * When reset_icount is true, current TB will be interrupted and
- * icount should be recalculated.
- */
-int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
-                              uintptr_t searched_pc, bool reset_icount)
+static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
+                                   uint64_t *data)
 {
-    uint64_t data[TARGET_INSN_START_WORDS];
-    uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
+    uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
     const uint8_t *p = tb->tc.ptr + tb->tc.size;
     int i, j, num_insns = tb->icount;
-#ifdef CONFIG_PROFILER
-    TCGProfile *prof = &tcg_ctx->prof;
-    int64_t ti = profile_getclock();
-#endif
 
-    searched_pc -= GETPC_ADJ;
+    host_pc -= GETPC_ADJ;
 
-    if (searched_pc < host_pc) {
+    if (host_pc < iter_pc) {
         return -1;
     }
 
-    memset(data, 0, sizeof(data));
+    memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
     if (!TARGET_TB_PCREL) {
         data[0] = tb_pc(tb);
     }
 
-    /* Reconstruct the stored insn data while looking for the point at
-       which the end of the insn exceeds the searched_pc.  */
+    /*
+     * Reconstruct the stored insn data while looking for the point
+     * at which the end of the insn exceeds host_pc.
+     */
     for (i = 0; i < num_insns; ++i) {
         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
             data[j] += decode_sleb128(&p);
         }
-        host_pc += decode_sleb128(&p);
-        if (host_pc > searched_pc) {
-            goto found;
+        iter_pc += decode_sleb128(&p);
+        if (iter_pc > host_pc) {
+            return num_insns - i;
         }
     }
     return -1;
+}
+
+/*
+ * The cpu state corresponding to 'host_pc' is restored in
+ * preparation for exiting the TB.
+ */
+void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
+                               uintptr_t host_pc)
+{
+    uint64_t data[TARGET_INSN_START_WORDS];
+#ifdef CONFIG_PROFILER
+    TCGProfile *prof = &tcg_ctx->prof;
+    int64_t ti = profile_getclock();
+#endif
+    int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
+
+    if (insns_left < 0) {
+        return;
+    }
 
- found:
-    if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
+    if (tb_cflags(tb) & CF_USE_ICOUNT) {
         assert(icount_enabled());
-        /* Reset the cycle counter to the start of the block
-           and shift if to the number of actually executed instructions */
-        cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
+        /*
+         * Reset the cycle counter to the start of the block and
+         * shift if to the number of actually executed instructions.
+         */
+        cpu_neg(cpu)->icount_decr.u16.low += insns_left;
     }
 
     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
@@ -302,20 +315,11 @@ int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                 prof->restore_time + profile_getclock() - ti);
     qatomic_set(&prof->restore_count, prof->restore_count + 1);
 #endif
-    return 0;
 }
 
-bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
+bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
 {
     /*
-     * The pc update associated with restore without exit will
-     * break the relative pc adjustments performed by TARGET_TB_PCREL.
-     */
-    if (TARGET_TB_PCREL) {
-        assert(will_exit);
-    }
-
-    /*
      * The host_pc has to be in the rx region of the code buffer.
      * If it is not we will not be able to resolve it here.
      * The two cases where host_pc will not be correct are:
@@ -328,13 +332,24 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
         TranslationBlock *tb = tcg_tb_lookup(host_pc);
         if (tb) {
-            cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
+            cpu_restore_state_from_tb(cpu, tb, host_pc);
             return true;
         }
     }
     return false;
 }
 
+bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
+{
+    if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
+        TranslationBlock *tb = tcg_tb_lookup(host_pc);
+        if (tb) {
+            return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
+        }
+    }
+    return false;
+}
+
 void page_init(void)
 {
     page_size_init();
@@ -1016,7 +1031,7 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
     tb = tcg_tb_lookup(retaddr);
     if (tb) {
         /* We can use retranslation to find the PC.  */
-        cpu_restore_state_from_tb(cpu, tb, retaddr, true);
+        cpu_restore_state_from_tb(cpu, tb, retaddr);
         tb_phys_invalidate(tb, -1);
     } else {
         /* The exception probably happened in a helper.  The CPU state should
@@ -1052,7 +1067,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
                   (void *)retaddr);
     }
-    cpu_restore_state_from_tb(cpu, tb, retaddr, true);
+    cpu_restore_state_from_tb(cpu, tb, retaddr);
 
     /*
      * Some guests must re-execute the branch when re-executing a delay
@@ -1565,15 +1580,13 @@ void tcg_flush_jmp_cache(CPUState *cpu)
 {
     CPUJumpCache *jc = cpu->tb_jmp_cache;
 
-    if (likely(jc)) {
-        for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
-            qatomic_set(&jc->array[i].tb, NULL);
-        }
-    } else {
-        /* This should happen once during realize, and thus never race. */
-        jc = g_new0(CPUJumpCache, 1);
-        jc = qatomic_xchg(&cpu->tb_jmp_cache, jc);
-        assert(jc == NULL);
+    /* During early initialization, the cache may not yet be allocated. */
+    if (unlikely(jc == NULL)) {
+        return;
+    }
+
+    for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
+        qatomic_set(&jc->array[i].tb, NULL);
     }
 }
 
diff --git a/cpu.c b/cpu.c
index 2a09b05205..4a7d865427 100644
--- a/cpu.c
+++ b/cpu.c
@@ -134,15 +134,23 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
     /* cache the cpu class for the hotpath */
     cpu->cc = CPU_GET_CLASS(cpu);
 
-    cpu_list_add(cpu);
     if (!accel_cpu_realizefn(cpu, errp)) {
         return;
     }
+
     /* NB: errp parameter is unused currently */
     if (tcg_enabled()) {
         tcg_exec_realizefn(cpu, errp);
     }
 
+    /* Wait until cpu initialization complete before exposing cpu. */
+    cpu_list_add(cpu);
+
+    /* Plugin initialization must wait until cpu_index assigned. */
+    if (tcg_enabled()) {
+        qemu_plugin_vcpu_init_hook(cpu);
+    }
+
 #ifdef CONFIG_USER_ONLY
     assert(qdev_get_vmsd(DEVICE(cpu)) == NULL ||
            qdev_get_vmsd(DEVICE(cpu))->unmigratable);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index e948992a80..9b7bfbf09a 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -40,19 +40,29 @@ typedef ram_addr_t tb_page_addr_t;
 #endif
 
 /**
+ * cpu_unwind_state_data:
+ * @cpu: the cpu context
+ * @host_pc: the host pc within the translation
+ * @data: output data
+ *
+ * Attempt to load the the unwind state for a host pc occurring in
+ * translated code.  If @host_pc is not in translated code, the
+ * function returns false; otherwise @data is loaded.
+ * This is the same unwind info as given to restore_state_to_opc.
+ */
+bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data);
+
+/**
  * cpu_restore_state:
- * @cpu: the vCPU state is to be restore to
- * @searched_pc: the host PC the fault occurred at
- * @will_exit: true if the TB executed will be interrupted after some
-               cpu adjustments. Required for maintaining the correct
-               icount valus
+ * @cpu: the cpu context
+ * @host_pc: the host pc within the translation
  * @return: true if state was restored, false otherwise
  *
  * Attempt to restore the state for a fault occurring in translated
- * code. If the searched_pc is not in translated code no state is
+ * code. If @host_pc is not in translated code no state is
  * restored and the function returns false.
  */
-bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc, bool will_exit);
+bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc);
 
 G_NORETURN void cpu_loop_exit_noexc(CPUState *cpu);
 G_NORETURN void cpu_loop_exit(CPUState *cpu);
diff --git a/meson.build b/meson.build
index 1c1afcc9b8..d809d51791 100644
--- a/meson.build
+++ b/meson.build
@@ -49,7 +49,7 @@ qapi_trace_events = []
 bsd_oses = ['gnu/kfreebsd', 'freebsd', 'netbsd', 'openbsd', 'dragonfly', 'darwin']
 supported_oses = ['windows', 'freebsd', 'netbsd', 'openbsd', 'darwin', 'sunos', 'linux']
 supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv', 'x86', 'x86_64',
-  'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc', 'sparc64']
+  'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc64']
 
 cpu = host_machine.cpu_family()
 
@@ -469,8 +469,6 @@ if get_option('tcg').allowed()
   endif
   if get_option('tcg_interpreter')
     tcg_arch = 'tci'
-  elif host_arch == 'sparc64'
-    tcg_arch = 'sparc'
   elif host_arch == 'x86_64'
     tcg_arch = 'i386'
   elif host_arch == 'ppc64'
diff --git a/target/alpha/helper.c b/target/alpha/helper.c
index a5a389b5a3..970c869771 100644
--- a/target/alpha/helper.c
+++ b/target/alpha/helper.c
@@ -532,7 +532,7 @@ G_NORETURN void dynamic_excp(CPUAlphaState *env, uintptr_t retaddr,
     cs->exception_index = excp;
     env->error_code = error;
     if (retaddr) {
-        cpu_restore_state(cs, retaddr, true);
+        cpu_restore_state(cs, retaddr);
         /* Floating-point exceptions (our only users) point to the next PC.  */
         env->pc += 4;
     }
diff --git a/target/alpha/mem_helper.c b/target/alpha/mem_helper.c
index 47283a0612..a39b52c5dd 100644
--- a/target/alpha/mem_helper.c
+++ b/target/alpha/mem_helper.c
@@ -28,7 +28,7 @@ static void do_unaligned_access(CPUAlphaState *env, vaddr addr, uintptr_t retadd
     uint64_t pc;
     uint32_t insn;
 
-    cpu_restore_state(env_cpu(env), retaddr, true);
+    cpu_restore_state(env_cpu(env), retaddr);
 
     pc = env->pc;
     insn = cpu_ldl_code(env, pc);
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index c5bde1cfcc..70672bcd9f 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -78,7 +78,7 @@ void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome,
      * we must restore CPU state here before setting the syndrome
      * the caller passed us, and cannot use cpu_loop_exit_restore().
      */
-    cpu_restore_state(cs, ra, true);
+    cpu_restore_state(cs, ra);
     raise_exception(env, excp, syndrome, target_el);
 }
 
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
index 69b0dc69df..0f4f4fc809 100644
--- a/target/arm/tlb_helper.c
+++ b/target/arm/tlb_helper.c
@@ -156,7 +156,7 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
     ARMMMUFaultInfo fi = {};
 
     /* now we have a real cpu fault */
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
 
     fi.type = ARMFault_Alignment;
     arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi);
@@ -196,7 +196,7 @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
     ARMMMUFaultInfo fi = {};
 
     /* now we have a real cpu fault */
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
 
     fi.ea = arm_extabort_type(response);
     fi.type = ARMFault_SyncExternal;
@@ -252,7 +252,7 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
         return false;
     } else {
         /* now we have a real cpu fault */
-        cpu_restore_state(cs, retaddr, true);
+        cpu_restore_state(cs, retaddr);
         arm_deliver_fault(cpu, address, access_type, mmu_idx, fi);
     }
 }
@@ -271,7 +271,7 @@ void arm_cpu_record_sigsegv(CPUState *cs, vaddr addr,
      * We report both ESR and FAR to signal handlers.
      * For now, it's easiest to deliver the fault normally.
      */
-    cpu_restore_state(cs, ra, true);
+    cpu_restore_state(cs, ra);
     arm_deliver_fault(cpu, addr, access_type, MMU_USER_IDX, &fi);
 }
 
diff --git a/target/cris/helper.c b/target/cris/helper.c
index 91e4aeb178..81a72699b5 100644
--- a/target/cris/helper.c
+++ b/target/cris/helper.c
@@ -87,7 +87,7 @@ bool cris_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     cs->exception_index = EXCP_BUSFAULT;
     env->fault_vector = res.bf_vec;
     if (retaddr) {
-        if (cpu_restore_state(cs, retaddr, true)) {
+        if (cpu_restore_state(cs, retaddr)) {
             /* Evaluate flags after retranslation. */
             helper_top_evaluate_flags(env);
         }
diff --git a/target/i386/helper.c b/target/i386/helper.c
index b62a1e48e2..0ac2da066d 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -509,6 +509,27 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
     }
 }
 
+static inline target_ulong get_memio_eip(CPUX86State *env)
+{
+#ifdef CONFIG_TCG
+    uint64_t data[TARGET_INSN_START_WORDS];
+    CPUState *cs = env_cpu(env);
+
+    if (!cpu_unwind_state_data(cs, cs->mem_io_pc, data)) {
+        return env->eip;
+    }
+
+    /* Per x86_restore_state_to_opc. */
+    if (TARGET_TB_PCREL) {
+        return (env->eip & TARGET_PAGE_MASK) | data[0];
+    } else {
+        return data[0] - env->segs[R_CS].base;
+    }
+#else
+    qemu_build_not_reached();
+#endif
+}
+
 void cpu_report_tpr_access(CPUX86State *env, TPRAccess access)
 {
     X86CPU *cpu = env_archcpu(env);
@@ -519,9 +540,9 @@ void cpu_report_tpr_access(CPUX86State *env, TPRAccess access)
 
         cpu_interrupt(cs, CPU_INTERRUPT_TPR);
     } else if (tcg_enabled()) {
-        cpu_restore_state(cs, cs->mem_io_pc, false);
+        target_ulong eip = get_memio_eip(env);
 
-        apic_handle_tpr_access_report(cpu->apic_state, env->eip, access);
+        apic_handle_tpr_access_report(cpu->apic_state, eip, access);
     }
 }
 #endif /* !CONFIG_USER_ONLY */
diff --git a/target/i386/helper.h b/target/i386/helper.h
index 88143b2a24..b7de5429ef 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -56,13 +56,8 @@ DEF_HELPER_2(syscall, void, env, int)
 DEF_HELPER_2(sysret, void, env, int)
 #endif
 DEF_HELPER_FLAGS_2(pause, TCG_CALL_NO_WG, noreturn, env, int)
-DEF_HELPER_1(reset_rf, void, env)
 DEF_HELPER_FLAGS_3(raise_interrupt, TCG_CALL_NO_WG, noreturn, env, int, int)
 DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, int)
-DEF_HELPER_1(cli, void, env)
-DEF_HELPER_1(sti, void, env)
-DEF_HELPER_1(clac, void, env)
-DEF_HELPER_1(stac, void, env)
 DEF_HELPER_3(boundw, void, env, tl, int)
 DEF_HELPER_3(boundl, void, env, tl, int)
 
diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c
index cc7ea9e8b9..6227dbb30b 100644
--- a/target/i386/tcg/cc_helper.c
+++ b/target/i386/tcg/cc_helper.c
@@ -346,44 +346,3 @@ void helper_clts(CPUX86State *env)
     env->cr[0] &= ~CR0_TS_MASK;
     env->hflags &= ~HF_TS_MASK;
 }
-
-void helper_reset_rf(CPUX86State *env)
-{
-    env->eflags &= ~RF_MASK;
-}
-
-void helper_cli(CPUX86State *env)
-{
-    env->eflags &= ~IF_MASK;
-}
-
-void helper_sti(CPUX86State *env)
-{
-    env->eflags |= IF_MASK;
-}
-
-void helper_clac(CPUX86State *env)
-{
-    env->eflags &= ~AC_MASK;
-}
-
-void helper_stac(CPUX86State *env)
-{
-    env->eflags |= AC_MASK;
-}
-
-#if 0
-/* vm86plus instructions */
-void helper_cli_vm(CPUX86State *env)
-{
-    env->eflags &= ~VIF_MASK;
-}
-
-void helper_sti_vm(CPUX86State *env)
-{
-    env->eflags |= VIF_MASK;
-    if (env->eflags & VIP_MASK) {
-        raise_exception_ra(env, EXCP0D_GPF, GETPC());
-    }
-}
-#endif
diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c
index 8e88567399..2d27731b60 100644
--- a/target/i386/tcg/sysemu/svm_helper.c
+++ b/target/i386/tcg/sysemu/svm_helper.c
@@ -704,7 +704,7 @@ void cpu_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1,
 {
     CPUState *cs = env_cpu(env);
 
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmexit(%08x, %016" PRIx64 ", %016"
                   PRIx64 ", " TARGET_FMT_lx ")!\n",
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 546c427c23..0ee548ce56 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2746,6 +2746,26 @@ static void gen_reset_hflag(DisasContext *s, uint32_t mask)
     }
 }
 
+static void gen_set_eflags(DisasContext *s, target_ulong mask)
+{
+    TCGv t = tcg_temp_new();
+
+    tcg_gen_ld_tl(t, cpu_env, offsetof(CPUX86State, eflags));
+    tcg_gen_ori_tl(t, t, mask);
+    tcg_gen_st_tl(t, cpu_env, offsetof(CPUX86State, eflags));
+    tcg_temp_free(t);
+}
+
+static void gen_reset_eflags(DisasContext *s, target_ulong mask)
+{
+    TCGv t = tcg_temp_new();
+
+    tcg_gen_ld_tl(t, cpu_env, offsetof(CPUX86State, eflags));
+    tcg_gen_andi_tl(t, t, ~mask);
+    tcg_gen_st_tl(t, cpu_env, offsetof(CPUX86State, eflags));
+    tcg_temp_free(t);
+}
+
 /* Clear BND registers during legacy branches.  */
 static void gen_bnd_jmp(DisasContext *s)
 {
@@ -2776,7 +2796,7 @@ do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
     }
 
     if (s->base.tb->flags & HF_RF_MASK) {
-        gen_helper_reset_rf(cpu_env);
+        gen_reset_eflags(s, RF_MASK);
     }
     if (recheck_tf) {
         gen_helper_rechecking_single_step(cpu_env);
@@ -5502,12 +5522,12 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
 #endif
     case 0xfa: /* cli */
         if (check_iopl(s)) {
-            gen_helper_cli(cpu_env);
+            gen_reset_eflags(s, IF_MASK);
         }
         break;
     case 0xfb: /* sti */
         if (check_iopl(s)) {
-            gen_helper_sti(cpu_env);
+            gen_set_eflags(s, IF_MASK);
             /* interruptions are enabled only the first insn after sti */
             gen_update_eip_next(s);
             gen_eob_inhibit_irq(s, true);
@@ -5789,7 +5809,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
                 || CPL(s) != 0) {
                 goto illegal_op;
             }
-            gen_helper_clac(cpu_env);
+            gen_reset_eflags(s, AC_MASK);
             s->base.is_jmp = DISAS_EOB_NEXT;
             break;
 
@@ -5798,7 +5818,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
                 || CPL(s) != 0) {
                 goto illegal_op;
             }
-            gen_helper_stac(cpu_env);
+            gen_set_eflags(s, AC_MASK);
             s->base.is_jmp = DISAS_EOB_NEXT;
             break;
 
diff --git a/target/m68k/op_helper.c b/target/m68k/op_helper.c
index 5da176d642..1ce850bbc5 100644
--- a/target/m68k/op_helper.c
+++ b/target/m68k/op_helper.c
@@ -460,7 +460,7 @@ void m68k_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr,
     M68kCPU *cpu = M68K_CPU(cs);
     CPUM68KState *env = &cpu->env;
 
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
 
     if (m68k_feature(env, M68K_FEATURE_M68040)) {
         env->mmu.mmusr = 0;
@@ -558,7 +558,7 @@ raise_exception_format2(CPUM68KState *env, int tt, int ilen, uintptr_t raddr)
     cs->exception_index = tt;
 
     /* Recover PC and CC_OP for the beginning of the insn.  */
-    cpu_restore_state(cs, raddr, true);
+    cpu_restore_state(cs, raddr);
 
     /* Flags are current in env->cc_*, or are undefined. */
     env->cc_op = CC_OP_FLAGS;
diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c
index a607fe68e5..98bdb82de8 100644
--- a/target/microblaze/helper.c
+++ b/target/microblaze/helper.c
@@ -277,7 +277,7 @@ void mb_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
     uint32_t esr, iflags;
 
     /* Recover the pc and iflags from the corresponding insn_start.  */
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
     iflags = cpu->env.iflags;
 
     qemu_log_mask(CPU_LOG_INT,
diff --git a/target/nios2/op_helper.c b/target/nios2/op_helper.c
index 2e30d0a908..0aaf33ffc2 100644
--- a/target/nios2/op_helper.c
+++ b/target/nios2/op_helper.c
@@ -40,7 +40,7 @@ void nios2_cpu_loop_exit_advance(CPUNios2State *env, uintptr_t retaddr)
      * Do this here, rather than in restore_state_to_opc(),
      * lest we affect QEMU internal exceptions, like EXCP_DEBUG.
      */
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
     env->pc += 4;
     cpu_loop_exit(cs);
 }
diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
index 09b3c97d7c..ec145960e3 100644
--- a/target/openrisc/sys_helper.c
+++ b/target/openrisc/sys_helper.c
@@ -45,14 +45,14 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb)
         break;
 
     case TO_SPR(0, 16): /* NPC */
-        cpu_restore_state(cs, GETPC(), true);
+        cpu_restore_state(cs, GETPC());
         /* ??? Mirror or1ksim in not trashing delayed branch state
            when "jumping" to the current instruction.  */
         if (env->pc != rb) {
             env->pc = rb;
             env->dflag = 0;
-            cpu_loop_exit(cs);
         }
+        cpu_loop_exit(cs);
         break;
 
     case TO_SPR(0, 17): /* SR */
@@ -131,7 +131,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb)
     case TO_SPR(8, 0):  /* PMR */
         env->pmr = rb;
         if (env->pmr & PMR_DME || env->pmr & PMR_SME) {
-            cpu_restore_state(cs, GETPC(), true);
+            cpu_restore_state(cs, GETPC());
             env->pc += 4;
             cs->halted = 1;
             raise_exception(cpu, EXCP_HALTED);
@@ -199,6 +199,7 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd,
                            target_ulong spr)
 {
 #ifndef CONFIG_USER_ONLY
+    uint64_t data[TARGET_INSN_START_WORDS];
     MachineState *ms = MACHINE(qdev_get_machine());
     OpenRISCCPU *cpu = env_archcpu(env);
     CPUState *cs = env_cpu(env);
@@ -232,14 +233,20 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd,
         return env->evbar;
 
     case TO_SPR(0, 16): /* NPC (equals PC) */
-        cpu_restore_state(cs, GETPC(), false);
+        if (cpu_unwind_state_data(cs, GETPC(), data)) {
+            return data[0];
+        }
         return env->pc;
 
     case TO_SPR(0, 17): /* SR */
         return cpu_get_sr(env);
 
     case TO_SPR(0, 18): /* PPC */
-        cpu_restore_state(cs, GETPC(), false);
+        if (cpu_unwind_state_data(cs, GETPC(), data)) {
+            if (data[1] & 2) {
+                return data[0] - 4;
+            }
+        }
         return env->ppc;
 
     case TO_SPR(0, 32): /* EPCR */
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 09a81561d4..a05a2ed595 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -3075,7 +3075,7 @@ void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
     uint32_t insn;
 
     /* Restore state and reload the insn we executed, for filling in DSISR.  */
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
     insn = cpu_ldl_code(env, env->nip);
 
     switch (env->mmu_model) {
diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c
index 29ccf70df1..2cd6d062b9 100644
--- a/target/s390x/tcg/excp_helper.c
+++ b/target/s390x/tcg/excp_helper.c
@@ -39,7 +39,7 @@ G_NORETURN void tcg_s390_program_interrupt(CPUS390XState *env,
 {
     CPUState *cs = env_cpu(env);
 
-    cpu_restore_state(cs, ra, true);
+    cpu_restore_state(cs, ra);
     qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n",
                   env->psw.addr);
     trigger_pgm_exception(env, code);
diff --git a/target/tricore/op_helper.c b/target/tricore/op_helper.c
index a79c838a92..532ae6b74c 100644
--- a/target/tricore/op_helper.c
+++ b/target/tricore/op_helper.c
@@ -31,7 +31,7 @@ void raise_exception_sync_internal(CPUTriCoreState *env, uint32_t class, int tin
 {
     CPUState *cs = env_cpu(env);
     /* in case we come from a helper-call we need to restore the PC */
-    cpu_restore_state(cs, pc, true);
+    cpu_restore_state(cs, pc);
 
     /* Tin is loaded into d[15] */
     env->gpr_d[15] = tin;
diff --git a/target/xtensa/helper.c b/target/xtensa/helper.c
index e0a9caab4b..2aa9777a8e 100644
--- a/target/xtensa/helper.c
+++ b/target/xtensa/helper.c
@@ -253,7 +253,7 @@ void xtensa_cpu_do_unaligned_access(CPUState *cs,
 
     assert(xtensa_option_enabled(env->config,
                                  XTENSA_OPTION_UNALIGNED_EXCEPTION));
-    cpu_restore_state(CPU(cpu), retaddr, true);
+    cpu_restore_state(CPU(cpu), retaddr);
     HELPER(exception_cause_vaddr)(env,
                                   env->pc, LOAD_STORE_ALIGNMENT_CAUSE,
                                   addr);
@@ -284,7 +284,7 @@ bool xtensa_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     } else if (probe) {
         return false;
     } else {
-        cpu_restore_state(cs, retaddr, true);
+        cpu_restore_state(cs, retaddr);
         HELPER(exception_cause_vaddr)(env, env->pc, ret, address);
     }
 }
@@ -297,7 +297,7 @@ void xtensa_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr,
     XtensaCPU *cpu = XTENSA_CPU(cs);
     CPUXtensaState *env = &cpu->env;
 
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
     HELPER(exception_cause_vaddr)(env, env->pc,
                                   access_type == MMU_INST_FETCH ?
                                   INSTR_PIF_ADDR_ERROR_CAUSE :
diff --git a/tcg/sparc/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h
index 3b751dc3fb..31e6fea1fc 100644
--- a/tcg/sparc/tcg-target-con-set.h
+++ b/tcg/sparc64/tcg-target-con-set.h
@@ -11,22 +11,12 @@
  */
 C_O0_I1(r)
 C_O0_I2(rZ, r)
-C_O0_I2(RZ, r)
 C_O0_I2(rZ, rJ)
-C_O0_I2(RZ, RJ)
-C_O0_I2(sZ, A)
-C_O0_I2(SZ, A)
-C_O1_I1(r, A)
-C_O1_I1(R, A)
+C_O0_I2(sZ, s)
+C_O1_I1(r, s)
 C_O1_I1(r, r)
-C_O1_I1(r, R)
-C_O1_I1(R, r)
-C_O1_I1(R, R)
-C_O1_I2(R, R, R)
+C_O1_I2(r, r, r)
 C_O1_I2(r, rZ, rJ)
-C_O1_I2(R, RZ, RJ)
 C_O1_I4(r, rZ, rJ, rI, 0)
-C_O1_I4(R, RZ, RJ, RI, 0)
 C_O2_I2(r, r, rZ, rJ)
-C_O2_I4(R, R, RZ, RZ, RJ, RI)
 C_O2_I4(r, r, rZ, rZ, rJ, rJ)
diff --git a/tcg/sparc/tcg-target-con-str.h b/tcg/sparc64/tcg-target-con-str.h
index fdb25d9313..8f5c7aef97 100644
--- a/tcg/sparc/tcg-target-con-str.h
+++ b/tcg/sparc64/tcg-target-con-str.h
@@ -9,10 +9,7 @@
  * REGS(letter, register_mask)
  */
 REGS('r', ALL_GENERAL_REGS)
-REGS('R', ALL_GENERAL_REGS64)
 REGS('s', ALL_QLDST_REGS)
-REGS('S', ALL_QLDST_REGS64)
-REGS('A', TARGET_LONG_BITS == 64 ? ALL_QLDST_REGS64 : ALL_QLDST_REGS)
 
 /*
  * Define constraint letters for constants:
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index 72d9552fd0..cb9453efdd 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -22,6 +22,11 @@
  * THE SOFTWARE.
  */
 
+/* We only support generating code for 64-bit mode.  */
+#ifndef __arch64__
+#error "unsupported code generation mode"
+#endif
+
 #include "../tcg-pool.c.inc"
 
 #ifdef CONFIG_DEBUG_TCG
@@ -61,12 +66,6 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 };
 #endif
 
-#ifdef __arch64__
-# define SPARC64 1
-#else
-# define SPARC64 0
-#endif
-
 #define TCG_CT_CONST_S11  0x100
 #define TCG_CT_CONST_S13  0x200
 #define TCG_CT_CONST_ZERO 0x400
@@ -81,23 +80,8 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 #else
 #define SOFTMMU_RESERVE_REGS 0
 #endif
-
-/*
- * Note that sparcv8plus can only hold 64 bit quantities in %g and %o
- * registers.  These are saved manually by the kernel in full 64-bit
- * slots.  The %i and %l registers are saved by the register window
- * mechanism, which only allocates space for 32 bits.  Given that this
- * window spill/fill can happen on any signal, we must consider the
- * high bits of the %i and %l registers garbage at all times.
- */
 #define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 32)
-#if SPARC64
-# define ALL_GENERAL_REGS64  ALL_GENERAL_REGS
-#else
-# define ALL_GENERAL_REGS64  MAKE_64BIT_MASK(0, 16)
-#endif
 #define ALL_QLDST_REGS       (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
-#define ALL_QLDST_REGS64     (ALL_GENERAL_REGS64 & ~SOFTMMU_RESERVE_REGS)
 
 /* Define some temporary registers.  T2 is used for constant generation.  */
 #define TCG_REG_T1  TCG_REG_G1
@@ -306,11 +290,7 @@ static bool check_fit_i32(int32_t val, unsigned int bits)
 }
 
 #define check_fit_tl    check_fit_i64
-#if SPARC64
-# define check_fit_ptr  check_fit_i64
-#else
-# define check_fit_ptr  check_fit_i32
-#endif
+#define check_fit_ptr   check_fit_i64
 
 static bool patch_reloc(tcg_insn_unit *src_rw, int type,
                         intptr_t value, intptr_t addend)
@@ -573,11 +553,6 @@ static void tcg_out_sety(TCGContext *s, TCGReg rs)
     tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
 }
 
-static void tcg_out_rdy(TCGContext *s, TCGReg rd)
-{
-    tcg_out32(s, RDY | INSN_RD(rd));
-}
-
 static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1,
                           int32_t val2, int val2const, int uns)
 {
@@ -914,9 +889,7 @@ static void emit_extend(TCGContext *s, TCGReg r, int op)
         tcg_out_arithi(s, r, r, 16, SHIFT_SRL);
         break;
     case MO_32:
-        if (SPARC64) {
-            tcg_out_arith(s, r, r, 0, SHIFT_SRL);
-        }
+        tcg_out_arith(s, r, r, 0, SHIFT_SRL);
         break;
     case MO_64:
         break;
@@ -948,7 +921,6 @@ static void build_trampolines(TCGContext *s)
     };
 
     int i;
-    TCGReg ra;
 
     for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
         if (qemu_ld_helpers[i] == NULL) {
@@ -961,16 +933,8 @@ static void build_trampolines(TCGContext *s)
         }
         qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
 
-        if (SPARC64 || TARGET_LONG_BITS == 32) {
-            ra = TCG_REG_O3;
-        } else {
-            /* Install the high part of the address.  */
-            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX);
-            ra = TCG_REG_O4;
-        }
-
         /* Set the retaddr operand.  */
-        tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
+        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O3, TCG_REG_O7);
         /* Tail call.  */
         tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true);
         /* delay slot -- set the env argument */
@@ -988,37 +952,10 @@ static void build_trampolines(TCGContext *s)
         }
         qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
 
-        if (SPARC64) {
-            emit_extend(s, TCG_REG_O2, i);
-            ra = TCG_REG_O4;
-        } else {
-            ra = TCG_REG_O1;
-            if (TARGET_LONG_BITS == 64) {
-                /* Install the high part of the address.  */
-                tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
-                ra += 2;
-            } else {
-                ra += 1;
-            }
-            if ((i & MO_SIZE) == MO_64) {
-                /* Install the high part of the data.  */
-                tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
-                ra += 2;
-            } else {
-                emit_extend(s, ra, i);
-                ra += 1;
-            }
-            /* Skip the oi argument.  */
-            ra += 1;
-        }
-                
+        emit_extend(s, TCG_REG_O2, i);
+
         /* Set the retaddr operand.  */
-        if (ra >= TCG_REG_O6) {
-            tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK,
-                       TCG_TARGET_CALL_STACK_OFFSET);
-        } else {
-            tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
-        }
+        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O4, TCG_REG_O7);
 
         /* Tail call.  */
         tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true);
@@ -1047,11 +984,6 @@ static void build_trampolines(TCGContext *s)
             qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr);
         }
 
-        if (!SPARC64 && TARGET_LONG_BITS == 64) {
-            /* Install the high part of the address.  */
-            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX);
-        }
-
         /* Tail call.  */
         tcg_out_jmpl_const(s, helper, true, true);
         /* delay slot -- set the env argument */
@@ -1182,7 +1114,7 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
     tcg_out_cmp(s, r0, r2, 0);
 
     /* If the guest address must be zero-extended, do so now.  */
-    if (SPARC64 && TARGET_LONG_BITS == 32) {
+    if (TARGET_LONG_BITS == 32) {
         tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL);
         return r0;
     }
@@ -1231,7 +1163,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
 
 #ifdef CONFIG_SOFTMMU
     unsigned memi = get_mmuidx(oi);
-    TCGReg addrz, param;
+    TCGReg addrz;
     const tcg_insn_unit *func;
 
     addrz = tcg_out_tlb_load(s, addr, memi, memop,
@@ -1251,12 +1183,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
 
     /* TLB Miss.  */
 
-    param = TCG_REG_O1;
-    if (!SPARC64 && TARGET_LONG_BITS == 64) {
-        /* Skip the high-part; we'll perform the extract in the trampoline.  */
-        param++;
-    }
-    tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
+    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
 
     /* We use the helpers to extend SB and SW data, leaving the case
        of SL needing explicit extending below.  */
@@ -1268,30 +1195,13 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
     tcg_debug_assert(func != NULL);
     tcg_out_call_nodelay(s, func, false);
     /* delay slot */
-    tcg_out_movi(s, TCG_TYPE_I32, param, oi);
-
-    /* Recall that all of the helpers return 64-bit results.
-       Which complicates things for sparcv8plus.  */
-    if (SPARC64) {
-        /* We let the helper sign-extend SB and SW, but leave SL for here.  */
-        if (is_64 && (memop & MO_SSIZE) == MO_SL) {
-            tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
-        } else {
-            tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
-        }
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi);
+
+    /* We let the helper sign-extend SB and SW, but leave SL for here.  */
+    if (is_64 && (memop & MO_SSIZE) == MO_SL) {
+        tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
     } else {
-        if ((memop & MO_SIZE) == MO_64) {
-            tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX);
-            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL);
-            tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR);
-        } else if (is_64) {
-            /* Re-extend from 32-bit rather than reassembling when we
-               know the high register must be an extension.  */
-            tcg_out_arithi(s, data, TCG_REG_O1, 0,
-                           memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL);
-        } else {
-            tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1);
-        }
+        tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
     }
 
     *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
@@ -1301,7 +1211,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
     unsigned s_bits = memop & MO_SIZE;
     unsigned t_bits;
 
-    if (SPARC64 && TARGET_LONG_BITS == 32) {
+    if (TARGET_LONG_BITS == 32) {
         tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
         addr = TCG_REG_T1;
     }
@@ -1337,10 +1247,9 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
          * operation in the delay slot, and failure need only invoke the
          * handler for SIGBUS.
          */
-        TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64);
         tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false);
         /* delay slot -- move to low part of argument reg */
-        tcg_out_mov_delay(s, arg_low, addr);
+        tcg_out_mov_delay(s, TCG_REG_O1, addr);
     } else {
         /* Underalignment: load by pieces of minimum alignment. */
         int ld_opc, a_size, s_size, i;
@@ -1400,7 +1309,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
 
 #ifdef CONFIG_SOFTMMU
     unsigned memi = get_mmuidx(oi);
-    TCGReg addrz, param;
+    TCGReg addrz;
     const tcg_insn_unit *func;
 
     addrz = tcg_out_tlb_load(s, addr, memi, memop,
@@ -1418,23 +1327,14 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
 
     /* TLB Miss.  */
 
-    param = TCG_REG_O1;
-    if (!SPARC64 && TARGET_LONG_BITS == 64) {
-        /* Skip the high-part; we'll perform the extract in the trampoline.  */
-        param++;
-    }
-    tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
-    if (!SPARC64 && (memop & MO_SIZE) == MO_64) {
-        /* Skip the high-part; we'll perform the extract in the trampoline.  */
-        param++;
-    }
-    tcg_out_mov(s, TCG_TYPE_REG, param++, data);
+    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
+    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O2, data);
 
     func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
     tcg_debug_assert(func != NULL);
     tcg_out_call_nodelay(s, func, false);
     /* delay slot */
-    tcg_out_movi(s, TCG_TYPE_I32, param, oi);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O3, oi);
 
     *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
 #else
@@ -1443,7 +1343,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
     unsigned s_bits = memop & MO_SIZE;
     unsigned t_bits;
 
-    if (SPARC64 && TARGET_LONG_BITS == 32) {
+    if (TARGET_LONG_BITS == 32) {
         tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
         addr = TCG_REG_T1;
     }
@@ -1479,10 +1379,9 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
          * operation in the delay slot, and failure need only invoke the
          * handler for SIGBUS.
          */
-        TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64);
         tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false);
         /* delay slot -- move to low part of argument reg */
-        tcg_out_mov_delay(s, arg_low, addr);
+        tcg_out_mov_delay(s, TCG_REG_O1, addr);
     } else {
         /* Underalignment: store by pieces of minimum alignment. */
         int st_opc, a_size, s_size, i;
@@ -1719,14 +1618,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_muls2_i32:
         c = ARITH_SMUL;
     do_mul2:
-        /* The 32-bit multiply insns produce a full 64-bit result.  If the
-           destination register can hold it, we can avoid the slower RDY.  */
+        /* The 32-bit multiply insns produce a full 64-bit result. */
         tcg_out_arithc(s, a0, a2, args[3], const_args[3], c);
-        if (SPARC64 || a0 <= TCG_REG_O7) {
-            tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
-        } else {
-            tcg_out_rdy(s, a1);
-        }
+        tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
         break;
 
     case INDEX_op_qemu_ld_i32:
@@ -1833,107 +1727,91 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
         return C_O0_I1(r);
 
     case INDEX_op_ld8u_i32:
+    case INDEX_op_ld8u_i64:
     case INDEX_op_ld8s_i32:
+    case INDEX_op_ld8s_i64:
     case INDEX_op_ld16u_i32:
+    case INDEX_op_ld16u_i64:
     case INDEX_op_ld16s_i32:
+    case INDEX_op_ld16s_i64:
     case INDEX_op_ld_i32:
+    case INDEX_op_ld32u_i64:
+    case INDEX_op_ld32s_i64:
+    case INDEX_op_ld_i64:
     case INDEX_op_neg_i32:
+    case INDEX_op_neg_i64:
     case INDEX_op_not_i32:
+    case INDEX_op_not_i64:
+    case INDEX_op_ext32s_i64:
+    case INDEX_op_ext32u_i64:
+    case INDEX_op_ext_i32_i64:
+    case INDEX_op_extu_i32_i64:
+    case INDEX_op_extrl_i64_i32:
+    case INDEX_op_extrh_i64_i32:
         return C_O1_I1(r, r);
 
     case INDEX_op_st8_i32:
+    case INDEX_op_st8_i64:
     case INDEX_op_st16_i32:
+    case INDEX_op_st16_i64:
     case INDEX_op_st_i32:
+    case INDEX_op_st32_i64:
+    case INDEX_op_st_i64:
         return C_O0_I2(rZ, r);
 
     case INDEX_op_add_i32:
+    case INDEX_op_add_i64:
     case INDEX_op_mul_i32:
+    case INDEX_op_mul_i64:
     case INDEX_op_div_i32:
+    case INDEX_op_div_i64:
     case INDEX_op_divu_i32:
+    case INDEX_op_divu_i64:
     case INDEX_op_sub_i32:
+    case INDEX_op_sub_i64:
     case INDEX_op_and_i32:
+    case INDEX_op_and_i64:
     case INDEX_op_andc_i32:
+    case INDEX_op_andc_i64:
     case INDEX_op_or_i32:
+    case INDEX_op_or_i64:
     case INDEX_op_orc_i32:
+    case INDEX_op_orc_i64:
     case INDEX_op_xor_i32:
+    case INDEX_op_xor_i64:
     case INDEX_op_shl_i32:
+    case INDEX_op_shl_i64:
     case INDEX_op_shr_i32:
+    case INDEX_op_shr_i64:
     case INDEX_op_sar_i32:
+    case INDEX_op_sar_i64:
     case INDEX_op_setcond_i32:
+    case INDEX_op_setcond_i64:
         return C_O1_I2(r, rZ, rJ);
 
     case INDEX_op_brcond_i32:
+    case INDEX_op_brcond_i64:
         return C_O0_I2(rZ, rJ);
     case INDEX_op_movcond_i32:
+    case INDEX_op_movcond_i64:
         return C_O1_I4(r, rZ, rJ, rI, 0);
     case INDEX_op_add2_i32:
+    case INDEX_op_add2_i64:
     case INDEX_op_sub2_i32:
+    case INDEX_op_sub2_i64:
         return C_O2_I4(r, r, rZ, rZ, rJ, rJ);
     case INDEX_op_mulu2_i32:
     case INDEX_op_muls2_i32:
         return C_O2_I2(r, r, rZ, rJ);
-
-    case INDEX_op_ld8u_i64:
-    case INDEX_op_ld8s_i64:
-    case INDEX_op_ld16u_i64:
-    case INDEX_op_ld16s_i64:
-    case INDEX_op_ld32u_i64:
-    case INDEX_op_ld32s_i64:
-    case INDEX_op_ld_i64:
-    case INDEX_op_ext_i32_i64:
-    case INDEX_op_extu_i32_i64:
-        return C_O1_I1(R, r);
-
-    case INDEX_op_st8_i64:
-    case INDEX_op_st16_i64:
-    case INDEX_op_st32_i64:
-    case INDEX_op_st_i64:
-        return C_O0_I2(RZ, r);
-
-    case INDEX_op_add_i64:
-    case INDEX_op_mul_i64:
-    case INDEX_op_div_i64:
-    case INDEX_op_divu_i64:
-    case INDEX_op_sub_i64:
-    case INDEX_op_and_i64:
-    case INDEX_op_andc_i64:
-    case INDEX_op_or_i64:
-    case INDEX_op_orc_i64:
-    case INDEX_op_xor_i64:
-    case INDEX_op_shl_i64:
-    case INDEX_op_shr_i64:
-    case INDEX_op_sar_i64:
-    case INDEX_op_setcond_i64:
-        return C_O1_I2(R, RZ, RJ);
-
-    case INDEX_op_neg_i64:
-    case INDEX_op_not_i64:
-    case INDEX_op_ext32s_i64:
-    case INDEX_op_ext32u_i64:
-        return C_O1_I1(R, R);
-
-    case INDEX_op_extrl_i64_i32:
-    case INDEX_op_extrh_i64_i32:
-        return C_O1_I1(r, R);
-
-    case INDEX_op_brcond_i64:
-        return C_O0_I2(RZ, RJ);
-    case INDEX_op_movcond_i64:
-        return C_O1_I4(R, RZ, RJ, RI, 0);
-    case INDEX_op_add2_i64:
-    case INDEX_op_sub2_i64:
-        return C_O2_I4(R, R, RZ, RZ, RJ, RI);
     case INDEX_op_muluh_i64:
-        return C_O1_I2(R, R, R);
+        return C_O1_I2(r, r, r);
 
     case INDEX_op_qemu_ld_i32:
-        return C_O1_I1(r, A);
     case INDEX_op_qemu_ld_i64:
-        return C_O1_I1(R, A);
+        return C_O1_I1(r, s);
     case INDEX_op_qemu_st_i32:
-        return C_O0_I2(sZ, A);
     case INDEX_op_qemu_st_i64:
-        return C_O0_I2(SZ, A);
+        return C_O0_I2(sZ, s);
 
     default:
         g_assert_not_reached();
@@ -1954,7 +1832,7 @@ static void tcg_target_init(TCGContext *s)
 #endif
 
     tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
-    tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS64;
+    tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
 
     tcg_target_call_clobber_regs = 0;
     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G1);
@@ -1984,16 +1862,11 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
 }
 
-#if SPARC64
-# define ELF_HOST_MACHINE  EM_SPARCV9
-#else
-# define ELF_HOST_MACHINE  EM_SPARC32PLUS
-# define ELF_HOST_FLAGS    EF_SPARC_32PLUS
-#endif
+#define ELF_HOST_MACHINE  EM_SPARCV9
 
 typedef struct {
     DebugFrameHeader h;
-    uint8_t fde_def_cfa[SPARC64 ? 4 : 2];
+    uint8_t fde_def_cfa[4];
     uint8_t fde_win_save;
     uint8_t fde_ret_save[3];
 } DebugFrame;
@@ -2010,12 +1883,8 @@ static const DebugFrame debug_frame = {
     .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
 
     .fde_def_cfa = {
-#if SPARC64
         12, 30,                         /* DW_CFA_def_cfa i6, 2047 */
         (2047 & 0x7f) | 0x80, (2047 >> 7)
-#else
-        13, 30                          /* DW_CFA_def_cfa_register i6 */
-#endif
     },
     .fde_win_save = 0x2d,               /* DW_CFA_GNU_window_save */
     .fde_ret_save = { 9, 15, 31 },      /* DW_CFA_register o7, i7 */
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc64/tcg-target.h
index c050763049..8655acdbe5 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc64/tcg-target.h
@@ -25,8 +25,6 @@
 #ifndef SPARC_TCG_TARGET_H
 #define SPARC_TCG_TARGET_H
 
-#define TCG_TARGET_REG_BITS 64
-
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
 #define TCG_TARGET_NB_REGS 32
@@ -70,19 +68,10 @@ typedef enum {
 /* used for function call generation */
 #define TCG_REG_CALL_STACK TCG_REG_O6
 
-#ifdef __arch64__
 #define TCG_TARGET_STACK_BIAS           2047
 #define TCG_TARGET_STACK_ALIGN          16
 #define TCG_TARGET_CALL_STACK_OFFSET    (128 + 6*8 + TCG_TARGET_STACK_BIAS)
-#else
-#define TCG_TARGET_STACK_BIAS           0
-#define TCG_TARGET_STACK_ALIGN          8
-#define TCG_TARGET_CALL_STACK_OFFSET    (64 + 4 + 6*4)
-#endif
-
-#ifdef __arch64__
 #define TCG_TARGET_EXTEND_ARGS 1
-#endif
 
 #if defined(__VIS__) && __VIS__ >= 0x300
 #define use_vis3_instructions  1
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 612a12f58f..b6c46b7e25 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -634,9 +634,9 @@ static void tcg_context_init(unsigned max_cpus)
 
         if (nargs != 0) {
             ca->cif.arg_types = ca->args;
-            for (i = 0; i < nargs; ++i) {
-                int typecode = extract32(typemask, (i + 1) * 3, 3);
-                ca->args[i] = typecode_to_ffi[typecode];
+            for (int j = 0; j < nargs; ++j) {
+                int typecode = extract32(typemask, (j + 1) * 3, 3);
+                ca->args[j] = typecode_to_ffi[typecode];
             }
         }
 
@@ -1487,39 +1487,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
     }
 #endif
 
-#if defined(__sparc__) && !defined(__arch64__) \
-    && !defined(CONFIG_TCG_INTERPRETER)
-    /* We have 64-bit values in one register, but need to pass as two
-       separate parameters.  Split them.  */
-    int orig_typemask = typemask;
-    int orig_nargs = nargs;
-    TCGv_i64 retl, reth;
-    TCGTemp *split_args[MAX_OPC_PARAM];
-
-    retl = NULL;
-    reth = NULL;
-    typemask = 0;
-    for (i = real_args = 0; i < nargs; ++i) {
-        int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
-        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
-
-        if (is_64bit) {
-            TCGv_i64 orig = temp_tcgv_i64(args[i]);
-            TCGv_i32 h = tcg_temp_new_i32();
-            TCGv_i32 l = tcg_temp_new_i32();
-            tcg_gen_extr_i64_i32(l, h, orig);
-            split_args[real_args++] = tcgv_i32_temp(h);
-            typemask |= dh_typecode_i32 << (real_args * 3);
-            split_args[real_args++] = tcgv_i32_temp(l);
-            typemask |= dh_typecode_i32 << (real_args * 3);
-        } else {
-            split_args[real_args++] = args[i];
-            typemask |= argtype << (real_args * 3);
-        }
-    }
-    nargs = real_args;
-    args = split_args;
-#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
     for (i = 0; i < nargs; ++i) {
         int argtype = extract32(typemask, (i + 1) * 3, 3);
         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
@@ -1542,22 +1510,6 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
 
     pi = 0;
     if (ret != NULL) {
-#if defined(__sparc__) && !defined(__arch64__) \
-    && !defined(CONFIG_TCG_INTERPRETER)
-        if ((typemask & 6) == dh_typecode_i64) {
-            /* The 32-bit ABI is going to return the 64-bit value in
-               the %o0/%o1 register pair.  Prepare for this by using
-               two return temporaries, and reassemble below.  */
-            retl = tcg_temp_new_i64();
-            reth = tcg_temp_new_i64();
-            op->args[pi++] = tcgv_i64_arg(reth);
-            op->args[pi++] = tcgv_i64_arg(retl);
-            nb_rets = 2;
-        } else {
-            op->args[pi++] = temp_arg(ret);
-            nb_rets = 1;
-        }
-#else
         if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
 #if HOST_BIG_ENDIAN
             op->args[pi++] = temp_arg(ret + 1);
@@ -1571,7 +1523,6 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
             op->args[pi++] = temp_arg(ret);
             nb_rets = 1;
         }
-#endif
     } else {
         nb_rets = 0;
     }
@@ -1634,29 +1585,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
 
-#if defined(__sparc__) && !defined(__arch64__) \
-    && !defined(CONFIG_TCG_INTERPRETER)
-    /* Free all of the parts we allocated above.  */
-    for (i = real_args = 0; i < orig_nargs; ++i) {
-        int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
-        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
-
-        if (is_64bit) {
-            tcg_temp_free_internal(args[real_args++]);
-            tcg_temp_free_internal(args[real_args++]);
-        } else {
-            real_args++;
-        }
-    }
-    if ((orig_typemask & 6) == dh_typecode_i64) {
-        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
-           Note that describing these as TCGv_i64 eliminates an unnecessary
-           zero-extension that tcg_gen_concat_i32_i64 would create.  */
-        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
-        tcg_temp_free_i64(retl);
-        tcg_temp_free_i64(reth);
-    }
-#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
     for (i = 0; i < nargs; ++i) {
         int argtype = extract32(typemask, (i + 1) * 3, 3);
         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target
index 78104f9bbb..5f0fee1aad 100644
--- a/tests/tcg/multiarch/Makefile.target
+++ b/tests/tcg/multiarch/Makefile.target
@@ -36,6 +36,9 @@ threadcount: LDFLAGS+=-lpthread
 
 signals: LDFLAGS+=-lrt -lpthread
 
+munmap-pthread: CFLAGS+=-pthread
+munmap-pthread: LDFLAGS+=-pthread
+
 # We define the runner for test-mmap after the individual
 # architectures have defined their supported pages sizes. If no
 # additional page sizes are defined we only run the default test.
diff --git a/tests/tcg/multiarch/munmap-pthread.c b/tests/tcg/multiarch/munmap-pthread.c
new file mode 100644
index 0000000000..d7143b00d5
--- /dev/null
+++ b/tests/tcg/multiarch/munmap-pthread.c
@@ -0,0 +1,79 @@
+/* Test that munmap() and thread creation do not race. */
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+static const char nop_func[] = {
+#if defined(__aarch64__)
+    0xc0, 0x03, 0x5f, 0xd6,     /* ret */
+#elif defined(__alpha__)
+    0x01, 0x80, 0xFA, 0x6B,     /* ret */
+#elif defined(__arm__)
+    0x1e, 0xff, 0x2f, 0xe1,     /* bx lr */
+#elif defined(__riscv)
+    0x67, 0x80, 0x00, 0x00,     /* ret */
+#elif defined(__s390__)
+    0x07, 0xfe,                 /* br %r14 */
+#elif defined(__i386__) || defined(__x86_64__)
+    0xc3,                       /* ret */
+#endif
+};
+
+static void *thread_mmap_munmap(void *arg)
+{
+    volatile bool *run = arg;
+    char *p;
+    int ret;
+
+    while (*run) {
+        p = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE | PROT_EXEC,
+                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        assert(p != MAP_FAILED);
+
+        /* Create a small translation block.  */
+        memcpy(p, nop_func, sizeof(nop_func));
+        ((void(*)(void))p)();
+
+        ret = munmap(p, getpagesize());
+        assert(ret == 0);
+    }
+
+    return NULL;
+}
+
+static void *thread_dummy(void *arg)
+{
+    return NULL;
+}
+
+int main(void)
+{
+    pthread_t mmap_munmap, dummy;
+    volatile bool run = true;
+    int i, ret;
+
+    /* Without a template, nothing to test. */
+    if (sizeof(nop_func) == 0) {
+        return EXIT_SUCCESS;
+    }
+
+    ret = pthread_create(&mmap_munmap, NULL, thread_mmap_munmap, (void *)&run);
+    assert(ret == 0);
+
+    for (i = 0; i < 1000; i++) {
+        ret = pthread_create(&dummy, NULL, thread_dummy, NULL);
+        assert(ret == 0);
+        ret = pthread_join(dummy, NULL);
+        assert(ret == 0);
+    }
+
+    run = false;
+    ret = pthread_join(mmap_munmap, NULL);
+    assert(ret == 0);
+
+    return EXIT_SUCCESS;
+}