diff options
63 files changed, 3335 insertions, 1850 deletions
diff --git a/Makefile.objs b/Makefile.objs index 82e8a1e10a..06f74b8b99 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -89,7 +89,7 @@ endif ####################################################################### # Target-independent parts used in system and user emulation -common-obj-y += tcg-runtime.o cpus-common.o +common-obj-y += cpus-common.o common-obj-y += hw/ common-obj-y += qom/ common-obj-y += disas/ diff --git a/Makefile.target b/Makefile.target index 2c46091225..7a5080e94a 100644 --- a/Makefile.target +++ b/Makefile.target @@ -94,6 +94,7 @@ obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o obj-y += fpu/softfloat.o obj-y += target-$(TARGET_BASE_ARCH)/ obj-y += disas.o +obj-y += tcg-runtime.o obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o diff --git a/atomic_template.h b/atomic_template.h new file mode 100644 index 0000000000..b400b2a3d3 --- /dev/null +++ b/atomic_template.h @@ -0,0 +1,215 @@ +/* + * Atomic helper templates + * Included from tcg-runtime.c and cputlb.c. + * + * Copyright (c) 2016 Red Hat, Inc + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#if DATA_SIZE == 16 +# define SUFFIX o +# define DATA_TYPE Int128 +# define BSWAP bswap128 +#elif DATA_SIZE == 8 +# define SUFFIX q +# define DATA_TYPE uint64_t +# define BSWAP bswap64 +#elif DATA_SIZE == 4 +# define SUFFIX l +# define DATA_TYPE uint32_t +# define BSWAP bswap32 +#elif DATA_SIZE == 2 +# define SUFFIX w +# define DATA_TYPE uint16_t +# define BSWAP bswap16 +#elif DATA_SIZE == 1 +# define SUFFIX b +# define DATA_TYPE uint8_t +# define BSWAP +#else +# error unsupported data size +#endif + +#if DATA_SIZE >= 4 +# define ABI_TYPE DATA_TYPE +#else +# define ABI_TYPE uint32_t +#endif + +/* Define host-endian atomic operations. Note that END is used within + the ATOMIC_NAME macro, and redefined below. */ +#if DATA_SIZE == 1 +# define END +#elif defined(HOST_WORDS_BIGENDIAN) +# define END _be +#else +# define END _le +#endif + +ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, + ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS) +{ + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + return atomic_cmpxchg__nocheck(haddr, cmpv, newv); +} + +#if DATA_SIZE >= 16 +ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) +{ + DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; + __atomic_load(haddr, &val, __ATOMIC_RELAXED); + return val; +} + +void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, + ABI_TYPE val EXTRA_ARGS) +{ + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + __atomic_store(haddr, &val, __ATOMIC_RELAXED); +} +#else +ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, + ABI_TYPE val EXTRA_ARGS) +{ + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + return atomic_xchg__nocheck(haddr, val); +} + +#define GEN_ATOMIC_HELPER(X) \ +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ + ABI_TYPE val EXTRA_ARGS) \ +{ \ + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ + return atomic_##X(haddr, val); \ +} \ + +GEN_ATOMIC_HELPER(fetch_add) +GEN_ATOMIC_HELPER(fetch_and) +GEN_ATOMIC_HELPER(fetch_or) +GEN_ATOMIC_HELPER(fetch_xor) +GEN_ATOMIC_HELPER(add_fetch) +GEN_ATOMIC_HELPER(and_fetch) +GEN_ATOMIC_HELPER(or_fetch) +GEN_ATOMIC_HELPER(xor_fetch) + +#undef GEN_ATOMIC_HELPER +#endif /* DATA SIZE >= 16 */ + +#undef END + +#if DATA_SIZE > 1 + +/* Define reverse-host-endian atomic operations. Note that END is used + within the ATOMIC_NAME macro. */ +#ifdef HOST_WORDS_BIGENDIAN +# define END _le +#else +# define END _be +#endif + +ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, + ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS) +{ + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + return BSWAP(atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv))); +} + +#if DATA_SIZE >= 16 +ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) +{ + DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; + __atomic_load(haddr, &val, __ATOMIC_RELAXED); + return BSWAP(val); +} + +void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, + ABI_TYPE val EXTRA_ARGS) +{ + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + val = BSWAP(val); + __atomic_store(haddr, &val, __ATOMIC_RELAXED); +} +#else +ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, + ABI_TYPE val EXTRA_ARGS) +{ + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + return BSWAP(atomic_xchg__nocheck(haddr, BSWAP(val))); +} + +#define GEN_ATOMIC_HELPER(X) \ +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ + ABI_TYPE val EXTRA_ARGS) \ +{ \ + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ + return BSWAP(atomic_##X(haddr, BSWAP(val))); \ +} + +GEN_ATOMIC_HELPER(fetch_and) +GEN_ATOMIC_HELPER(fetch_or) +GEN_ATOMIC_HELPER(fetch_xor) +GEN_ATOMIC_HELPER(and_fetch) +GEN_ATOMIC_HELPER(or_fetch) +GEN_ATOMIC_HELPER(xor_fetch) + +#undef GEN_ATOMIC_HELPER + +/* Note that for addition, we need to use a separate cmpxchg loop instead + of bswaps for the reverse-host-endian helpers. */ +ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr, + ABI_TYPE val EXTRA_ARGS) +{ + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + DATA_TYPE ldo, ldn, ret, sto; + + ldo = atomic_read__nocheck(haddr); + while (1) { + ret = BSWAP(ldo); + sto = BSWAP(ret + val); + ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto); + if (ldn == ldo) { + return ret; + } + ldo = ldn; + } +} + +ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr, + ABI_TYPE val EXTRA_ARGS) +{ + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + DATA_TYPE ldo, ldn, ret, sto; + + ldo = atomic_read__nocheck(haddr); + while (1) { + ret = BSWAP(ldo) + val; + sto = BSWAP(ret); + ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto); + if (ldn == ldo) { + return ret; + } + ldo = ldn; + } +} +#endif /* DATA_SIZE >= 16 */ + +#undef END +#endif /* DATA_SIZE > 1 */ + +#undef BSWAP +#undef ABI_TYPE +#undef DATA_TYPE +#undef SUFFIX +#undef DATA_SIZE diff --git a/configure b/configure index d3dafcbb37..8e10059607 100755 --- a/configure +++ b/configure @@ -1216,7 +1216,10 @@ case "$cpu" in cc_i386='$(CC) -m32' ;; x86_64) - CPU_CFLAGS="-m64" + # ??? Only extremely old AMD cpus do not have cmpxchg16b. + # If we truly care, we should simply detect this case at + # runtime and generate the fallback to serial emulation. + CPU_CFLAGS="-m64 -mcx16" LDFLAGS="-m64 $LDFLAGS" cc_i386='$(CC) -m32' ;; @@ -4521,6 +4524,55 @@ if compile_prog "" "" ; then int128=yes fi +######################################### +# See if 128-bit atomic operations are supported. + +atomic128=no +if test "$int128" = "yes"; then + cat > $TMPC << EOF +int main(void) +{ + unsigned __int128 x = 0, y = 0; + y = __atomic_load_16(&x, 0); + __atomic_store_16(&x, y, 0); + __atomic_compare_exchange_16(&x, &y, x, 0, 0, 0); + return 0; +} +EOF + if compile_prog "" "" ; then + atomic128=yes + fi +fi + +######################################### +# See if 64-bit atomic operations are supported. +# Note that without __atomic builtins, we can only +# assume atomic loads/stores max at pointer size. + +cat > $TMPC << EOF +#include <stdint.h> +int main(void) +{ + uint64_t x = 0, y = 0; +#ifdef __ATOMIC_RELAXED + y = __atomic_load_8(&x, 0); + __atomic_store_8(&x, y, 0); + __atomic_compare_exchange_8(&x, &y, x, 0, 0, 0); + __atomic_exchange_8(&x, y, 0); + __atomic_fetch_add_8(&x, y, 0); +#else + typedef char is_host64[sizeof(void *) >= sizeof(uint64_t) ? 1 : -1]; + __sync_lock_test_and_set(&x, y); + __sync_val_compare_and_swap(&x, y, 0); + __sync_fetch_and_add(&x, y); +#endif + return 0; +} +EOF +if compile_prog "" "" ; then + atomic64=yes +fi + ######################################## # check if getauxval is available. @@ -5483,6 +5535,14 @@ if test "$int128" = "yes" ; then echo "CONFIG_INT128=y" >> $config_host_mak fi +if test "$atomic128" = "yes" ; then + echo "CONFIG_ATOMIC128=y" >> $config_host_mak +fi + +if test "$atomic64" = "yes" ; then + echo "CONFIG_ATOMIC64=y" >> $config_host_mak +fi + if test "$getauxval" = "yes" ; then echo "CONFIG_GETAUXVAL=y" >> $config_host_mak fi diff --git a/cpu-exec-common.c b/cpu-exec-common.c index 0cb4ae60ef..767d9c6f0c 100644 --- a/cpu-exec-common.c +++ b/cpu-exec-common.c @@ -77,3 +77,9 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) } siglongjmp(cpu->jmp_env, 1); } + +void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc) +{ + cpu->exception_index = EXCP_ATOMIC; + cpu_loop_exit_restore(cpu, pc); +} diff --git a/cpu-exec.c b/cpu-exec.c index e114fcdf29..9400732137 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -151,12 +151,6 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) && qemu_log_in_addr_range(itb->pc)) { #if defined(TARGET_I386) log_cpu_state(cpu, CPU_DUMP_CCOP); -#elif defined(TARGET_M68K) - /* ??? Should not modify env state for dumping. */ - cpu_m68k_flush_flags(env, env->cc_op); - env->cc_op = CC_OP_FLAGS; - env->sr = (env->sr & 0xffe0) | env->cc_dest | (env->cc_x << 4); - log_cpu_state(cpu, 0); #else log_cpu_state(cpu, 0); #endif @@ -222,6 +216,36 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, } #endif +static void cpu_exec_step(CPUState *cpu) +{ + CPUArchState *env = (CPUArchState *)cpu->env_ptr; + TranslationBlock *tb; + target_ulong cs_base, pc; + uint32_t flags; + + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); + tb = tb_gen_code(cpu, pc, cs_base, flags, + 1 | CF_NOCACHE | CF_IGNORE_ICOUNT); + tb->orig_tb = NULL; + /* execute the generated code */ + trace_exec_tb_nocache(tb, pc); + cpu_tb_exec(cpu, tb); + tb_phys_invalidate(tb, -1); + tb_free(tb); +} + +void cpu_exec_step_atomic(CPUState *cpu) +{ + start_exclusive(); + + /* Since we got here, we know that parallel_cpus must be true. */ + parallel_cpus = false; + cpu_exec_step(cpu); + parallel_cpus = true; + + end_exclusive(); +} + struct tb_desc { target_ulong pc; target_ulong cs_base; diff --git a/cpus.c b/cpus.c index 31204bb4b3..cfd5cdc0e3 100644 --- a/cpus.c +++ b/cpus.c @@ -1497,6 +1497,8 @@ static void tcg_exec_all(void) if (r == EXCP_DEBUG) { cpu_handle_guest_debug(cpu); break; + } else if (r == EXCP_ATOMIC) { + cpu_exec_step_atomic(cpu); } } else if (cpu->stop || cpu->stopped) { if (cpu->unplug) { diff --git a/cputlb.c b/cputlb.c index 3c99c34ac8..cc4da4d7eb 100644 --- a/cputlb.c +++ b/cputlb.c @@ -23,15 +23,15 @@ #include "exec/memory.h" #include "exec/address-spaces.h" #include "exec/cpu_ldst.h" - #include "exec/cputlb.h" - #include "exec/memory-internal.h" #include "exec/ram_addr.h" #include "exec/exec-all.h" #include "tcg/tcg.h" #include "qemu/error-report.h" #include "exec/log.h" +#include "exec/helper-proto.h" +#include "qemu/atomic.h" /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ /* #define DEBUG_TLB */ @@ -498,6 +498,43 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr) return qemu_ram_addr_from_host_nofail(p); } +static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, + target_ulong addr, uintptr_t retaddr, int size) +{ + CPUState *cpu = ENV_GET_CPU(env); + hwaddr physaddr = iotlbentry->addr; + MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs); + uint64_t val; + + physaddr = (physaddr & TARGET_PAGE_MASK) + addr; + cpu->mem_io_pc = retaddr; + if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { + cpu_io_recompile(cpu, retaddr); + } + + cpu->mem_io_vaddr = addr; + memory_region_dispatch_read(mr, physaddr, &val, size, iotlbentry->attrs); + return val; +} + +static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, + uint64_t val, target_ulong addr, + uintptr_t retaddr, int size) +{ + CPUState *cpu = ENV_GET_CPU(env); + hwaddr physaddr = iotlbentry->addr; + MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs); + + physaddr = (physaddr & TARGET_PAGE_MASK) + addr; + if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { + cpu_io_recompile(cpu, retaddr); + } + + cpu->mem_io_vaddr = addr; + cpu->mem_io_pc = retaddr; + memory_region_dispatch_write(mr, physaddr, val, size, iotlbentry->attrs); +} + /* Return true if ADDR is present in the victim tlb, and has been copied back to the main tlb. */ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, @@ -527,34 +564,178 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ (ADDR) & TARGET_PAGE_MASK) +/* Probe for whether the specified guest write access is permitted. + * If it is not permitted then an exception will be taken in the same + * way as if this were a real write access (and we will not return). + * Otherwise the function will return, and there will be a valid + * entry in the TLB for this access. + */ +void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, + uintptr_t retaddr) +{ + int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + + if ((addr & TARGET_PAGE_MASK) + != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + /* TLB entry is for a different page */ + if (!VICTIM_TLB_HIT(addr_write, addr)) { + tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr); + } + } +} + +/* Probe for a read-modify-write atomic operation. Do not allow unaligned + * operations, or io operations to proceed. Return the host address. */ +static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + size_t mmu_idx = get_mmuidx(oi); + size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index]; + target_ulong tlb_addr = tlbe->addr_write; + TCGMemOp mop = get_memop(oi); + int a_bits = get_alignment_bits(mop); + int s_bits = mop & MO_SIZE; + + /* Adjust the given return address. */ + retaddr -= GETPC_ADJ; + + /* Enforce guest required alignment. */ + if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { + /* ??? Maybe indicate atomic op to cpu_unaligned_access */ + cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, + mmu_idx, retaddr); + } + + /* Enforce qemu required alignment. */ + if (unlikely(addr & ((1 << s_bits) - 1))) { + /* We get here if guest alignment was not requested, + or was not enforced by cpu_unaligned_access above. + We might widen the access and emulate, but for now + mark an exception and exit the cpu loop. */ + goto stop_the_world; + } + + /* Check TLB entry and enforce page permissions. */ + if ((addr & TARGET_PAGE_MASK) + != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (!VICTIM_TLB_HIT(addr_write, addr)) { + tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr); + } + tlb_addr = tlbe->addr_write; + } + + /* Notice an IO access, or a notdirty page. */ + if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + /* There's really nothing that can be done to + support this apart from stop-the-world. */ + goto stop_the_world; + } + + /* Let the guest notice RMW on a write-only page. */ + if (unlikely(tlbe->addr_read != tlb_addr)) { + tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_LOAD, mmu_idx, retaddr); + /* Since we don't support reads and writes to different addresses, + and we do have the proper page loaded for write, this shouldn't + ever return. But just in case, handle via stop-the-world. */ + goto stop_the_world; + } + + return (void *)((uintptr_t)addr + tlbe->addend); + + stop_the_world: + cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr); +} + +#ifdef TARGET_WORDS_BIGENDIAN +# define TGT_BE(X) (X) +# define TGT_LE(X) BSWAP(X) +#else +# define TGT_BE(X) BSWAP(X) +# define TGT_LE(X) (X) +#endif + #define MMUSUFFIX _mmu -#define SHIFT 0 +#define DATA_SIZE 1 #include "softmmu_template.h" -#define SHIFT 1 +#define DATA_SIZE 2 #include "softmmu_template.h" -#define SHIFT 2 +#define DATA_SIZE 4 #include "softmmu_template.h" -#define SHIFT 3 +#define DATA_SIZE 8 #include "softmmu_template.h" -#undef MMUSUFFIX +/* First set of helpers allows passing in of OI and RETADDR. This makes + them callable from other helpers. */ + +#define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr +#define ATOMIC_NAME(X) \ + HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) +#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) + +#define DATA_SIZE 1 +#include "atomic_template.h" + +#define DATA_SIZE 2 +#include "atomic_template.h" + +#define DATA_SIZE 4 +#include "atomic_template.h" + +#ifdef CONFIG_ATOMIC64 +#define DATA_SIZE 8 +#include "atomic_template.h" +#endif + +#ifdef CONFIG_ATOMIC128 +#define DATA_SIZE 16 +#include "atomic_template.h" +#endif + +/* Second set of helpers are directly callable from TCG as helpers. */ + +#undef EXTRA_ARGS +#undef ATOMIC_NAME +#undef ATOMIC_MMU_LOOKUP +#define EXTRA_ARGS , TCGMemOpIdx oi +#define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) +#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) + +#define DATA_SIZE 1 +#include "atomic_template.h" + +#define DATA_SIZE 2 +#include "atomic_template.h" + +#define DATA_SIZE 4 +#include "atomic_template.h" + +#ifdef CONFIG_ATOMIC64 +#define DATA_SIZE 8 +#include "atomic_template.h" +#endif + +/* Code access functions. */ + +#undef MMUSUFFIX #define MMUSUFFIX _cmmu #undef GETPC #define GETPC() ((uintptr_t)0) #define SOFTMMU_CODE_ACCESS -#define SHIFT 0 +#define DATA_SIZE 1 #include "softmmu_template.h" -#define SHIFT 1 +#define DATA_SIZE 2 #include "softmmu_template.h" -#define SHIFT 2 +#define DATA_SIZE 4 #include "softmmu_template.h" -#define SHIFT 3 +#define DATA_SIZE 8 #include "softmmu_template.h" diff --git a/exec.c b/exec.c index 587b489eef..4c84389b56 100644 --- a/exec.c +++ b/exec.c @@ -352,9 +352,9 @@ static inline bool section_covers_addr(const MemoryRegionSection *section, /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means * the section must cover the entire address space. */ - return section->size.hi || + return int128_gethi(section->size) || range_covers_byte(section->offset_within_address_space, - section->size.lo, addr); + int128_getlo(section->size), addr); } static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr, diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c index cd95340cd9..537face94d 100644 --- a/hw/audio/intel-hda.c +++ b/hw/audio/intel-hda.c @@ -416,7 +416,8 @@ static bool intel_hda_xfer(HDACodecDevice *dev, uint32_t stnr, bool output, } left = len; - while (left > 0) { + s = st->bentries; + while (left > 0 && s-- > 0) { copy = left; if (copy > st->bsize - st->lpib) copy = st->bsize - st->lpib; diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index fa6fd0e53f..60bce94d6b 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -333,6 +333,7 @@ static void virtio_gpu_resource_create_2d(VirtIOGPU *g, qemu_log_mask(LOG_GUEST_ERROR, "%s: host couldn't handle guest format %d\n", __func__, c2d.format); + g_free(res); cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; return; } diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index 6505983c12..2b11499829 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -1278,11 +1278,10 @@ e1000e_write_lgcy_rx_descr(E1000ECore *core, uint8_t *desc, struct e1000_rx_desc *d = (struct e1000_rx_desc *) desc; - memset(d, 0, sizeof(*d)); - assert(!rss_info->enabled); d->length = cpu_to_le16(length); + d->csum = 0; e1000e_build_rx_metadata(core, pkt, pkt != NULL, rss_info, @@ -1291,6 +1290,7 @@ e1000e_write_lgcy_rx_descr(E1000ECore *core, uint8_t *desc, &d->special); d->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); d->status = (uint8_t) le32_to_cpu(status_flags); + d->special = 0; } static inline void @@ -1301,7 +1301,7 @@ e1000e_write_ext_rx_descr(E1000ECore *core, uint8_t *desc, { union e1000_rx_desc_extended *d = (union e1000_rx_desc_extended *) desc; - memset(d, 0, sizeof(*d)); + memset(&d->wb, 0, sizeof(d->wb)); d->wb.upper.length = cpu_to_le16(length); @@ -1325,7 +1325,7 @@ e1000e_write_ps_rx_descr(E1000ECore *core, uint8_t *desc, union e1000_rx_desc_packet_split *d = (union e1000_rx_desc_packet_split *) desc; - memset(d, 0, sizeof(*d)); + memset(&d->wb, 0, sizeof(d->wb)); d->wb.middle.length0 = cpu_to_le16((*written)[0]); diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c index bab4dbfc98..4bf71f2d85 100644 --- a/hw/net/eepro100.c +++ b/hw/net/eepro100.c @@ -1843,6 +1843,7 @@ static void pci_nic_uninit(PCIDevice *pci_dev) EEPRO100State *s = DO_UPCAST(EEPRO100State, dev, pci_dev); vmstate_unregister(&pci_dev->qdev, s->vmstate, s); + g_free(s->vmstate); eeprom93xx_free(&pci_dev->qdev, s->eeprom); qemu_del_nic(s->nic); } diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c index 198a01f92d..654455355f 100644 --- a/hw/net/pcnet.c +++ b/hw/net/pcnet.c @@ -302,7 +302,7 @@ static inline void pcnet_tmd_load(PCNetState *s, struct pcnet_TMD *tmd, uint32_t tbadr; int16_t length; int16_t status; - } xda; + } xda; s->phys_mem_read(s->dma_opaque, addr, (void *)&xda, sizeof(xda), 0); tmd->tbadr = le32_to_cpu(xda.tbadr) & 0xffffff; tmd->length = le16_to_cpu(xda.length); @@ -664,7 +664,9 @@ static inline int ladr_match(PCNetState *s, const uint8_t *buf, int size) static inline hwaddr pcnet_rdra_addr(PCNetState *s, int idx) { - while (idx < 1) idx += CSR_RCVRL(s); + while (idx < 1) { + idx += CSR_RCVRL(s); + } return s->rdra + ((CSR_RCVRL(s) - idx) * (BCR_SWSTYLE(s) ? 16 : 8)); } @@ -672,8 +674,10 @@ static inline int64_t pcnet_get_next_poll_time(PCNetState *s, int64_t current_ti { int64_t next_time = current_time + (65536 - (CSR_SPND(s) ? 0 : CSR_POLL(s))) * 30; - if (next_time <= current_time) + + if (next_time <= current_time) { next_time = current_time + 1; + } return next_time; } @@ -795,13 +799,13 @@ static void pcnet_init(PCNetState *s) mode = le16_to_cpu(initblk.mode); rlen = initblk.rlen >> 4; tlen = initblk.tlen >> 4; - ladrf[0] = le16_to_cpu(initblk.ladrf[0]); - ladrf[1] = le16_to_cpu(initblk.ladrf[1]); - ladrf[2] = le16_to_cpu(initblk.ladrf[2]); - ladrf[3] = le16_to_cpu(initblk.ladrf[3]); - padr[0] = le16_to_cpu(initblk.padr[0]); - padr[1] = le16_to_cpu(initblk.padr[1]); - padr[2] = le16_to_cpu(initblk.padr[2]); + ladrf[0] = le16_to_cpu(initblk.ladrf[0]); + ladrf[1] = le16_to_cpu(initblk.ladrf[1]); + ladrf[2] = le16_to_cpu(initblk.ladrf[2]); + ladrf[3] = le16_to_cpu(initblk.ladrf[3]); + padr[0] = le16_to_cpu(initblk.padr[0]); + padr[1] = le16_to_cpu(initblk.padr[1]); + padr[2] = le16_to_cpu(initblk.padr[2]); rdra = le32_to_cpu(initblk.rdra); tdra = le32_to_cpu(initblk.tdra); } else { @@ -809,13 +813,13 @@ static void pcnet_init(PCNetState *s) s->phys_mem_read(s->dma_opaque, PHYSADDR(s,CSR_IADR(s)), (uint8_t *)&initblk, sizeof(initblk), 0); mode = le16_to_cpu(initblk.mode); - ladrf[0] = le16_to_cpu(initblk.ladrf[0]); - ladrf[1] = le16_to_cpu(initblk.ladrf[1]); - ladrf[2] = le16_to_cpu(initblk.ladrf[2]); - ladrf[3] = le16_to_cpu(initblk.ladrf[3]); - padr[0] = le16_to_cpu(initblk.padr[0]); - padr[1] = le16_to_cpu(initblk.padr[1]); - padr[2] = le16_to_cpu(initblk.padr[2]); + ladrf[0] = le16_to_cpu(initblk.ladrf[0]); + ladrf[1] = le16_to_cpu(initblk.ladrf[1]); + ladrf[2] = le16_to_cpu(initblk.ladrf[2]); + ladrf[3] = le16_to_cpu(initblk.ladrf[3]); + padr[0] = le16_to_cpu(initblk.padr[0]); + padr[1] = le16_to_cpu(initblk.padr[1]); + padr[2] = le16_to_cpu(initblk.padr[2]); rdra = le32_to_cpu(initblk.rdra); tdra = le32_to_cpu(initblk.tdra); rlen = rdra >> 29; @@ -858,12 +862,12 @@ static void pcnet_start(PCNetState *s) printf("pcnet_start\n"); #endif - if (!CSR_DTX(s)) + if (!CSR_DTX(s)) { s->csr[0] |= 0x0010; /* set TXON */ - - if (!CSR_DRX(s)) + } + if (!CSR_DRX(s)) { s->csr[0] |= 0x0020; /* set RXON */ - + } s->csr[0] &= ~0x0004; /* clear STOP bit */ s->csr[0] |= 0x0002; pcnet_poll_timer(s); @@ -925,8 +929,7 @@ static void pcnet_rdte_poll(PCNetState *s) crda); } } else { - printf("pcnet: BAD RMD RDA=0x" TARGET_FMT_plx "\n", - crda); + printf("pcnet: BAD RMD RDA=0x" TARGET_FMT_plx "\n", crda); #endif } } @@ -1168,10 +1171,11 @@ ssize_t pcnet_receive(NetClientState *nc, const uint8_t *buf, size_t size_) #endif while (pktcount--) { - if (CSR_RCVRC(s) <= 1) + if (CSR_RCVRC(s) <= 1) { CSR_RCVRC(s) = CSR_RCVRL(s); - else + } else { CSR_RCVRC(s)--; + } } pcnet_rdte_poll(s); @@ -1207,7 +1211,7 @@ static void pcnet_transmit(PCNetState *s) s->tx_busy = 1; - txagain: +txagain: if (pcnet_tdte_poll(s)) { struct pcnet_TMD tmd; @@ -1251,7 +1255,7 @@ static void pcnet_transmit(PCNetState *s) s->phys_mem_read(s->dma_opaque, PHYSADDR(s, tmd.tbadr), s->buffer + s->xmit_pos, bcnt, CSR_BSWP(s)); s->xmit_pos += bcnt; - + if (!GET_FIELD(tmd.status, TMDS, ENP)) { goto txdone; } @@ -1276,21 +1280,22 @@ static void pcnet_transmit(PCNetState *s) s->csr[4] |= 0x0004; /* set TXSTRT */ s->xmit_pos = -1; - txdone: +txdone: SET_FIELD(&tmd.status, TMDS, OWN, 0); TMDSTORE(&tmd, PHYSADDR(s,CSR_CXDA(s))); - if (!CSR_TOKINTD(s) || (CSR_LTINTEN(s) && GET_FIELD(tmd.status, TMDS, LTINT))) + if (!CSR_TOKINTD(s) + || (CSR_LTINTEN(s) && GET_FIELD(tmd.status, TMDS, LTINT))) { s->csr[0] |= 0x0200; /* set TINT */ - - if (CSR_XMTRC(s)<=1) + } + if (CSR_XMTRC(s) <= 1) { CSR_XMTRC(s) = CSR_XMTRL(s); - else + } else { CSR_XMTRC(s)--; - if (count--) + } + if (count--) { goto txagain; - - } else - if (s->xmit_pos >= 0) { + } + } else if (s->xmit_pos >= 0) { struct pcnet_TMD tmd; TMDLOAD(&tmd, xmit_cxda); SET_FIELD(&tmd.misc, TMDM, BUFF, 1); @@ -1301,9 +1306,9 @@ static void pcnet_transmit(PCNetState *s) s->csr[0] |= 0x0200; /* set TINT */ if (!CSR_DXSUFLO(s)) { s->csr[0] &= ~0x0010; - } else - if (count--) - goto txagain; + } else if (count--) { + goto txagain; + } } s->tx_busy = 0; @@ -1315,13 +1320,11 @@ static void pcnet_poll(PCNetState *s) pcnet_rdte_poll(s); } - if (CSR_TDMD(s) || - (CSR_TXON(s) && !CSR_DPOLL(s) && pcnet_tdte_poll(s))) - { + if (CSR_TDMD(s) || (CSR_TXON(s) && !CSR_DPOLL(s) && pcnet_tdte_poll(s))) { /* prevent recursion */ - if (s->tx_busy) + if (s->tx_busy) { return; - + } pcnet_transmit(s); } } @@ -1340,15 +1343,16 @@ static void pcnet_poll_timer(void *opaque) if (!CSR_STOP(s) && !CSR_SPND(s) && !CSR_DPOLL(s)) { uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) * 33; - if (!s->timer || !now) + if (!s->timer || !now) { s->timer = now; - else { + } else { uint64_t t = now - s->timer + CSR_POLL(s); if (t > 0xffffLL) { pcnet_poll(s); CSR_POLL(s) = CSR_PINT(s); - } else + } else { CSR_POLL(s) = t; + } } timer_mod(s->poll_timer, pcnet_get_next_poll_time(s,qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL))); @@ -1371,21 +1375,21 @@ static void pcnet_csr_writew(PCNetState *s, uint32_t rap, uint32_t new_value) val = (val & 0x007f) | (s->csr[0] & 0x7f00); /* IFF STOP, STRT and INIT are set, clear STRT and INIT */ - if ((val&7) == 7) - val &= ~3; - - if (!CSR_STOP(s) && (val & 4)) + if ((val & 7) == 7) { + val &= ~3; + } + if (!CSR_STOP(s) && (val & 4)) { pcnet_stop(s); - - if (!CSR_INIT(s) && (val & 1)) + } + if (!CSR_INIT(s) && (val & 1)) { pcnet_init(s); - - if (!CSR_STRT(s) && (val & 2)) + } + if (!CSR_STRT(s) && (val & 2)) { pcnet_start(s); - - if (CSR_TDMD(s)) + } + if (CSR_TDMD(s)) { pcnet_transmit(s); - + } return; case 1: case 2: @@ -1429,12 +1433,16 @@ static void pcnet_csr_writew(PCNetState *s, uint32_t rap, uint32_t new_value) case 47: /* POLLINT */ case 72: case 74: + break; case 76: /* RCVRL */ case 78: /* XMTRL */ + val = (val > 0) ? val : 512; + break; case 112: - if (CSR_STOP(s) || CSR_SPND(s)) - break; - return; + if (CSR_STOP(s) || CSR_SPND(s)) { + break; + } + return; case 3: break; case 4: @@ -1651,8 +1659,7 @@ void pcnet_ioport_writel(void *opaque, uint32_t addr, uint32_t val) pcnet_bcr_writew(s, s->rap, val & 0xffff); break; } - } else - if ((addr & 0x0f) == 0) { + } else if ((addr & 0x0f) == 0) { /* switch device to dword i/o mode */ pcnet_bcr_writew(s, BCR_BSBC, pcnet_bcr_readw(s, BCR_BSBC) | 0x0080); #ifdef PCNET_DEBUG_IO diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c index 30f2ce417b..e9d215aa4d 100644 --- a/hw/net/rocker/rocker.c +++ b/hw/net/rocker/rocker.c @@ -860,7 +860,7 @@ static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val) rocker_msix_irq(r, val); break; case ROCKER_TEST_DMA_SIZE: - r->test_dma_size = val; + r->test_dma_size = val & 0xFFFF; break; case ROCKER_TEST_DMA_ADDR + 4: r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32; diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c index 3345bc6b5e..f05e59c85f 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -2350,7 +2350,7 @@ static void rtl8139_cplus_transmit(RTL8139State *s) { int txcount = 0; - while (rtl8139_cplus_transmit_one(s)) + while (txcount < 64 && rtl8139_cplus_transmit_one(s)) { ++txcount; } diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 90f6943668..92f6af9620 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -531,6 +531,7 @@ static void vmxnet3_complete_packet(VMXNET3State *s, int qidx, uint32_t tx_ridx) VMXNET3_RING_DUMP(VMW_RIPRN, "TXC", qidx, &s->txq_descr[qidx].comp_ring); + memset(&txcq_descr, 0, sizeof(txcq_descr)); txcq_descr.txdIdx = tx_ridx; txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring); diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 861260d3db..e9004e5798 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -31,6 +31,7 @@ #define EXCP_DEBUG 0x10002 /* cpu stopped after a breakpoint or singlestep */ #define EXCP_HALTED 0x10003 /* cpu is halted (waiting for external event) */ #define EXCP_YIELD 0x10004 /* cpu wants to yield timeslice to another */ +#define EXCP_ATOMIC 0x10005 /* stop-the-world and emulate atomic */ /* some important defines: * diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 9797d556e8..cb624e4acc 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -59,6 +59,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, void QEMU_NORETURN cpu_loop_exit(CPUState *cpu); void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc); +void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc); #if !defined(CONFIG_USER_ONLY) void cpu_reloading_memory_map(void); diff --git a/include/qemu-common.h b/include/qemu-common.h index 7e6e4feb4b..1430390eb6 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -80,6 +80,7 @@ void tcg_exec_init(unsigned long tb_size); bool tcg_enabled(void); void cpu_exec_init_all(void); +void cpu_exec_step_atomic(CPUState *cpu); /** * set_preferred_target_page_bits: diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index c09fce704f..878fa0700d 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -99,15 +99,21 @@ * no effect on the generated code but not using the atomic primitives * will get flagged by sanitizers as a violation. */ +#define atomic_read__nocheck(ptr) \ + __atomic_load_n(ptr, __ATOMIC_RELAXED) + #define atomic_read(ptr) \ ({ \ QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ - __atomic_load_n(ptr, __ATOMIC_RELAXED); \ + atomic_read__nocheck(ptr); \ }) +#define atomic_set__nocheck(ptr, i) \ + __atomic_store_n(ptr, i, __ATOMIC_RELAXED) + #define atomic_set(ptr, i) do { \ QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ - __atomic_store_n(ptr, i, __ATOMIC_RELAXED); \ + atomic_set__nocheck(ptr, i); \ } while(0) /* See above: most compilers currently treat consume and acquire the @@ -151,20 +157,27 @@ /* All the remaining operations are fully sequentially consistent */ +#define atomic_xchg__nocheck(ptr, i) ({ \ + __atomic_exchange_n(ptr, (i), __ATOMIC_SEQ_CST); \ +}) + #define atomic_xchg(ptr, i) ({ \ QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ - __atomic_exchange_n(ptr, i, __ATOMIC_SEQ_CST); \ + atomic_xchg__nocheck(ptr, i); \ }) /* Returns the eventual value, failed or not */ -#define atomic_cmpxchg(ptr, old, new) \ - ({ \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ +#define atomic_cmpxchg__nocheck(ptr, old, new) ({ \ typeof_strip_qual(*ptr) _old = (old); \ __atomic_compare_exchange_n(ptr, &_old, new, false, \ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \ _old; \ - }) +}) + +#define atomic_cmpxchg(ptr, old, new) ({ \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + atomic_cmpxchg__nocheck(ptr, old, new); \ +}) /* Provide shorter names for GCC atomic builtins, return old value */ #define atomic_fetch_inc(ptr) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST) @@ -173,6 +186,15 @@ #define atomic_fetch_sub(ptr, n) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST) #define atomic_fetch_and(ptr, n) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST) #define atomic_fetch_or(ptr, n) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST) +#define atomic_fetch_xor(ptr, n) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST) + +#define atomic_inc_fetch(ptr) __atomic_add_fetch(ptr, 1, __ATOMIC_SEQ_CST) +#define atomic_dec_fetch(ptr) __atomic_sub_fetch(ptr, 1, __ATOMIC_SEQ_CST) +#define atomic_add_fetch(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_SEQ_CST) +#define atomic_sub_fetch(ptr, n) __atomic_sub_fetch(ptr, n, __ATOMIC_SEQ_CST) +#define atomic_and_fetch(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_SEQ_CST) +#define atomic_or_fetch(ptr, n) __atomic_or_fetch(ptr, n, __ATOMIC_SEQ_CST) +#define atomic_xor_fetch(ptr, n) __atomic_xor_fetch(ptr, n, __ATOMIC_SEQ_CST) /* And even shorter names that return void. */ #define atomic_inc(ptr) ((void) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST)) @@ -181,6 +203,7 @@ #define atomic_sub(ptr, n) ((void) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST)) #define atomic_and(ptr, n) ((void) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST)) #define atomic_or(ptr, n) ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST)) +#define atomic_xor(ptr, n) ((void) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST)) #else /* __ATOMIC_RELAXED */ @@ -269,8 +292,11 @@ /* These will only be atomic if the processor does the fetch or store * in a single issue memory operation */ -#define atomic_read(ptr) (*(__typeof__(*ptr) volatile*) (ptr)) -#define atomic_set(ptr, i) ((*(__typeof__(*ptr) volatile*) (ptr)) = (i)) +#define atomic_read__nocheck(p) (*(__typeof__(*(p)) volatile*) (p)) +#define atomic_set__nocheck(p, i) ((*(__typeof__(*(p)) volatile*) (p)) = (i)) + +#define atomic_read(ptr) atomic_read__nocheck(ptr) +#define atomic_set(ptr, i) atomic_set__nocheck(ptr,i) /** * atomic_rcu_read - reads a RCU-protected pointer to a local variable @@ -331,15 +357,27 @@ #define atomic_xchg(ptr, i) (smp_mb(), __sync_lock_test_and_set(ptr, i)) #endif #endif +#define atomic_xchg__nocheck atomic_xchg /* Provide shorter names for GCC atomic builtins. */ #define atomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1) #define atomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1) -#define atomic_fetch_add __sync_fetch_and_add -#define atomic_fetch_sub __sync_fetch_and_sub -#define atomic_fetch_and __sync_fetch_and_and -#define atomic_fetch_or __sync_fetch_and_or -#define atomic_cmpxchg __sync_val_compare_and_swap +#define atomic_fetch_add(ptr, n) __sync_fetch_and_add(ptr, n) +#define atomic_fetch_sub(ptr, n) __sync_fetch_and_sub(ptr, n) +#define atomic_fetch_and(ptr, n) __sync_fetch_and_and(ptr, n) +#define atomic_fetch_or(ptr, n) __sync_fetch_and_or(ptr, n) +#define atomic_fetch_xor(ptr, n) __sync_fetch_and_xor(ptr, n) + +#define atomic_inc_fetch(ptr) __sync_add_and_fetch(ptr, 1) +#define atomic_dec_fetch(ptr) __sync_add_and_fetch(ptr, -1) +#define atomic_add_fetch(ptr, n) __sync_add_and_fetch(ptr, n) +#define atomic_sub_fetch(ptr, n) __sync_sub_and_fetch(ptr, n) +#define atomic_and_fetch(ptr, n) __sync_and_and_fetch(ptr, n) +#define atomic_or_fetch(ptr, n) __sync_or_and_fetch(ptr, n) +#define atomic_xor_fetch(ptr, n) __sync_xor_and_fetch(ptr, n) + +#define atomic_cmpxchg(ptr, old, new) __sync_val_compare_and_swap(ptr, old, new) +#define atomic_cmpxchg__nocheck(ptr, old, new) atomic_cmpxchg(ptr, old, new) /* And even shorter names that return void. */ #define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1)) @@ -348,6 +386,7 @@ #define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n)) #define atomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n)) #define atomic_or(ptr, n) ((void) __sync_fetch_and_or(ptr, n)) +#define atomic_xor(ptr, n) ((void) __sync_fetch_and_xor(ptr, n)) #endif /* __ATOMIC_RELAXED */ diff --git a/include/qemu/int128.h b/include/qemu/int128.h index c5988813df..5c9890db8b 100644 --- a/include/qemu/int128.h +++ b/include/qemu/int128.h @@ -1,6 +1,149 @@ #ifndef INT128_H #define INT128_H +#ifdef CONFIG_INT128 +#include "qemu/bswap.h" + +typedef __int128_t Int128; + +static inline Int128 int128_make64(uint64_t a) +{ + return a; +} + +static inline Int128 int128_make128(uint64_t lo, uint64_t hi) +{ + return (__uint128_t)hi << 64 | lo; +} + +static inline uint64_t int128_get64(Int128 a) +{ + uint64_t r = a; + assert(r == a); + return r; +} + +static inline uint64_t int128_getlo(Int128 a) +{ + return a; +} + +static inline int64_t int128_gethi(Int128 a) +{ + return a >> 64; +} + +static inline Int128 int128_zero(void) +{ + return 0; +} + +static inline Int128 int128_one(void) +{ + return 1; +} + +static inline Int128 int128_2_64(void) +{ + return (Int128)1 << 64; +} + +static inline Int128 int128_exts64(int64_t a) +{ + return a; +} + +static inline Int128 int128_and(Int128 a, Int128 b) +{ + return a & b; +} + +static inline Int128 int128_rshift(Int128 a, int n) +{ + return a >> n; +} + +static inline Int128 int128_add(Int128 a, Int128 b) +{ + return a + b; +} + +static inline Int128 int128_neg(Int128 a) +{ + return -a; +} + +static inline Int128 int128_sub(Int128 a, Int128 b) +{ + return a - b; +} + +static inline bool int128_nonneg(Int128 a) +{ + return a >= 0; +} + +static inline bool int128_eq(Int128 a, Int128 b) +{ + return a == b; +} + +static inline bool int128_ne(Int128 a, Int128 b) +{ + return a != b; +} + +static inline bool int128_ge(Int128 a, Int128 b) +{ + return a >= b; +} + +static inline bool int128_lt(Int128 a, Int128 b) +{ + return a < b; +} + +static inline bool int128_le(Int128 a, Int128 b) +{ + return a <= b; +} + +static inline bool int128_gt(Int128 a, Int128 b) +{ + return a > b; +} + +static inline bool int128_nz(Int128 a) +{ + return a != 0; +} + +static inline Int128 int128_min(Int128 a, Int128 b) +{ + return a < b ? a : b; +} + +static inline Int128 int128_max(Int128 a, Int128 b) +{ + return a > b ? a : b; +} + +static inline void int128_addto(Int128 *a, Int128 b) +{ + *a += b; +} + +static inline void int128_subfrom(Int128 *a, Int128 b) +{ + *a -= b; +} + +static inline Int128 bswap128(Int128 a) +{ + return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a))); +} + +#else /* !CONFIG_INT128 */ typedef struct Int128 Int128; @@ -14,12 +157,27 @@ static inline Int128 int128_make64(uint64_t a) return (Int128) { a, 0 }; } +static inline Int128 int128_make128(uint64_t lo, uint64_t hi) +{ + return (Int128) { lo, hi }; +} + static inline uint64_t int128_get64(Int128 a) { assert(!a.hi); return a.lo; } +static inline uint64_t int128_getlo(Int128 a) +{ + return a.lo; +} + +static inline int64_t int128_gethi(Int128 a) +{ + return a.hi; +} + static inline Int128 int128_zero(void) { return int128_make64(0); @@ -53,9 +211,9 @@ static inline Int128 int128_rshift(Int128 a, int n) } h = a.hi >> (n & 63); if (n >= 64) { - return (Int128) { h, h >> 63 }; + return int128_make128(h, h >> 63); } else { - return (Int128) { (a.lo >> n) | ((uint64_t)a.hi << (64 - n)), h }; + return int128_make128((a.lo >> n) | ((uint64_t)a.hi << (64 - n)), h); } } @@ -69,18 +227,18 @@ static inline Int128 int128_add(Int128 a, Int128 b) * * So the carry is lo < a.lo. */ - return (Int128) { lo, (uint64_t)a.hi + b.hi + (lo < a.lo) }; + return int128_make128(lo, (uint64_t)a.hi + b.hi + (lo < a.lo)); } static inline Int128 int128_neg(Int128 a) { uint64_t lo = -a.lo; - return (Int128) { lo, ~(uint64_t)a.hi + !lo }; + return int128_make128(lo, ~(uint64_t)a.hi + !lo); } static inline Int128 int128_sub(Int128 a, Int128 b) { - return (Int128){ a.lo - b.lo, (uint64_t)a.hi - b.hi - (a.lo < b.lo) }; + return int128_make128(a.lo - b.lo, (uint64_t)a.hi - b.hi - (a.lo < b.lo)); } static inline bool int128_nonneg(Int128 a) @@ -143,4 +301,5 @@ static inline void int128_subfrom(Int128 *a, Int128 b) *a = int128_sub(*a, b); } -#endif +#endif /* CONFIG_INT128 */ +#endif /* INT128_H */ diff --git a/linux-user/main.c b/linux-user/main.c index 54970bc4d9..75b199f274 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -354,6 +354,9 @@ void cpu_loop(CPUX86State *env) } } break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: pc = env->segs[R_CS].base + env->eip; EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n", @@ -550,94 +553,6 @@ do_kernel_trap(CPUARMState *env) return 0; } -/* Store exclusive handling for AArch32 */ -static int do_strex(CPUARMState *env) -{ - uint64_t val; - int size; - int rc = 1; - int segv = 0; - uint32_t addr; - start_exclusive(); - if (env->exclusive_addr != env->exclusive_test) { - goto fail; - } - /* We know we're always AArch32 so the address is in uint32_t range - * unless it was the -1 exclusive-monitor-lost value (which won't - * match exclusive_test above). - */ - assert(extract64(env->exclusive_addr, 32, 32) == 0); - addr = env->exclusive_addr; - size = env->exclusive_info & 0xf; - switch (size) { - case 0: - segv = get_user_u8(val, addr); - break; - case 1: - segv = get_user_data_u16(val, addr, env); - break; - case 2: - case 3: - segv = get_user_data_u32(val, addr, env); - break; - default: - abort(); - } - if (segv) { - env->exception.vaddress = addr; - goto done; - } - if (size == 3) { - uint32_t valhi; - segv = get_user_data_u32(valhi, addr + 4, env); - if (segv) { - env->exception.vaddress = addr + 4; - goto done; - } - if (arm_cpu_bswap_data(env)) { - val = deposit64((uint64_t)valhi, 32, 32, val); - } else { - val = deposit64(val, 32, 32, valhi); - } - } - if (val != env->exclusive_val) { - goto fail; - } - - val = env->regs[(env->exclusive_info >> 8) & 0xf]; - switch (size) { - case 0: - segv = put_user_u8(val, addr); - break; - case 1: - segv = put_user_data_u16(val, addr, env); - break; - case 2: - case 3: - segv = put_user_data_u32(val, addr, env); - break; - } - if (segv) { - env->exception.vaddress = addr; - goto done; - } - if (size == 3) { - val = env->regs[(env->exclusive_info >> 12) & 0xf]; - segv = put_user_data_u32(val, addr + 4, env); - if (segv) { - env->exception.vaddress = addr + 4; - goto done; - } - } - rc = 0; -fail: - env->regs[15] += 4; - env->regs[(env->exclusive_info >> 4) & 0xf] = rc; -done: - end_exclusive(); - return segv; -} - void cpu_loop(CPUARMState *env) { CPUState *cs = CPU(arm_env_get_cpu(env)); @@ -812,11 +727,6 @@ void cpu_loop(CPUARMState *env) case EXCP_INTERRUPT: /* just indicate that signals should be handled asap */ break; - case EXCP_STREX: - if (!do_strex(env)) { - break; - } - /* fall through for segv */ case EXCP_PREFETCH_ABORT: case EXCP_DATA_ABORT: addr = env->exception.vaddress; @@ -851,6 +761,9 @@ void cpu_loop(CPUARMState *env) case EXCP_YIELD: /* nothing to do here for user-mode, just resume guest code */ break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: error: EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr); @@ -862,124 +775,6 @@ void cpu_loop(CPUARMState *env) #else -/* - * Handle AArch64 store-release exclusive - * - * rs = gets the status result of store exclusive - * rt = is the register that is stored - * rt2 = is the second register store (in STP) - * - */ -static int do_strex_a64(CPUARMState *env) -{ - uint64_t val; - int size; - bool is_pair; - int rc = 1; - int segv = 0; - uint64_t addr; - int rs, rt, rt2; - - start_exclusive(); - /* size | is_pair << 2 | (rs << 4) | (rt << 9) | (rt2 << 14)); */ - size = extract32(env->exclusive_info, 0, 2); - is_pair = extract32(env->exclusive_info, 2, 1); - rs = extract32(env->exclusive_info, 4, 5); - rt = extract32(env->exclusive_info, 9, 5); - rt2 = extract32(env->exclusive_info, 14, 5); - - addr = env->exclusive_addr; - - if (addr != env->exclusive_test) { - goto finish; - } - - switch (size) { - case 0: - segv = get_user_u8(val, addr); - break; - case 1: - segv = get_user_u16(val, addr); - break; - case 2: - segv = get_user_u32(val, addr); - break; - case 3: - segv = get_user_u64(val, addr); - break; - default: - abort(); - } - if (segv) { - env->exception.vaddress = addr; - goto error; - } - if (val != env->exclusive_val) { - goto finish; - } - if (is_pair) { - if (size == 2) { - segv = get_user_u32(val, addr + 4); - } else { - segv = get_user_u64(val, addr + 8); - } - if (segv) { - env->exception.vaddress = addr + (size == 2 ? 4 : 8); - goto error; - } - if (val != env->exclusive_high) { - goto finish; - } - } - /* handle the zero register */ - val = rt == 31 ? 0 : env->xregs[rt]; - switch (size) { - case 0: - segv = put_user_u8(val, addr); - break; - case 1: - segv = put_user_u16(val, addr); - break; - case 2: - segv = put_user_u32(val, addr); - break; - case 3: - segv = put_user_u64(val, addr); - break; - } - if (segv) { - goto error; - } - if (is_pair) { - /* handle the zero register */ - val = rt2 == 31 ? 0 : env->xregs[rt2]; - if (size == 2) { - segv = put_user_u32(val, addr + 4); - } else { - segv = put_user_u64(val, addr + 8); - } - if (segv) { - env->exception.vaddress = addr + (size == 2 ? 4 : 8); - goto error; - } - } - rc = 0; -finish: - env->pc += 4; - /* rs == 31 encodes a write to the ZR, thus throwing away - * the status return. This is rather silly but valid. - */ - if (rs < 31) { - env->xregs[rs] = rc; - } -error: - /* instruction faulted, PC does not advance */ - /* either way a strex releases any exclusive lock we have */ - env->exclusive_addr = -1; - end_exclusive(); - return segv; -} - /* AArch64 main loop */ void cpu_loop(CPUARMState *env) { @@ -1021,11 +816,6 @@ void cpu_loop(CPUARMState *env) info._sifields._sigfault._addr = env->pc; queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); break; - case EXCP_STREX: - if (!do_strex_a64(env)) { - break; - } - /* fall through for segv */ case EXCP_PREFETCH_ABORT: case EXCP_DATA_ABORT: info.si_signo = TARGET_SIGSEGV; @@ -1051,6 +841,9 @@ void cpu_loop(CPUARMState *env) case EXCP_YIELD: /* nothing to do here for user-mode, just resume guest code */ break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr); abort(); @@ -1058,8 +851,6 @@ void cpu_loop(CPUARMState *env) process_pending_signals(env); /* Exception return on AArch64 always clears the exclusive monitor, * so any return to running guest code implies this. - * A strex (successful or otherwise) also clears the monitor, so - * we don't need to specialcase EXCP_STREX. */ env->exclusive_addr = -1; } @@ -1142,6 +933,9 @@ void cpu_loop(CPUUniCore32State *env) } } break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: goto error; } @@ -1415,6 +1209,9 @@ void cpu_loop (CPUSPARCState *env) } } break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: printf ("Unhandled trap: 0x%x\n", trapnr); cpu_dump_state(cs, stderr, fprintf, 0); @@ -1954,6 +1751,9 @@ void cpu_loop(CPUPPCState *env) case EXCP_INTERRUPT: /* just indicate that signals should be handled asap */ break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: cpu_abort(cs, "Unknown exception 0x%x. Aborting\n", trapnr); break; @@ -2649,6 +2449,9 @@ done_syscall: } } break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: error: EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr); @@ -2736,6 +2539,9 @@ void cpu_loop(CPUOpenRISCState *env) case EXCP_NR: qemu_log_mask(CPU_LOG_INT, "\nNR\n"); break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: EXCP_DUMP(env, "\nqemu: unhandled CPU exception %#x - aborting\n", trapnr); @@ -2812,6 +2618,9 @@ void cpu_loop(CPUSH4State *env) queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: printf ("Unhandled trap: 0x%x\n", trapnr); cpu_dump_state(cs, stderr, fprintf, 0); @@ -2879,6 +2688,9 @@ void cpu_loop(CPUCRISState *env) } } break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: printf ("Unhandled trap: 0x%x\n", trapnr); cpu_dump_state(cs, stderr, fprintf, 0); @@ -2995,6 +2807,9 @@ void cpu_loop(CPUMBState *env) } } break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: printf ("Unhandled trap: 0x%x\n", trapnr); cpu_dump_state(cs, stderr, fprintf, 0); @@ -3098,6 +2913,9 @@ void cpu_loop(CPUM68KState *env) } } break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr); abort(); @@ -3108,51 +2926,6 @@ void cpu_loop(CPUM68KState *env) #endif /* TARGET_M68K */ #ifdef TARGET_ALPHA -static void do_store_exclusive(CPUAlphaState *env, int reg, int quad) -{ - target_ulong addr, val, tmp; - target_siginfo_t info; - int ret = 0; - - addr = env->lock_addr; - tmp = env->lock_st_addr; - env->lock_addr = -1; - env->lock_st_addr = 0; - - start_exclusive(); - mmap_lock(); - - if (addr == tmp) { - if (quad ? get_user_s64(val, addr) : get_user_s32(val, addr)) { - goto do_sigsegv; - } - - if (val == env->lock_value) { - tmp = env->ir[reg]; - if (quad ? put_user_u64(tmp, addr) : put_user_u32(tmp, addr)) { - goto do_sigsegv; - } - ret = 1; - } - } - env->ir[reg] = ret; - env->pc += 4; - - mmap_unlock(); - end_exclusive(); - return; - - do_sigsegv: - mmap_unlock(); - end_exclusive(); - - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - info.si_code = TARGET_SEGV_MAPERR; - info._sifields._sigfault._addr = addr; - queue_signal(env, TARGET_SIGSEGV, QEMU_SI_FAULT, &info); -} - void cpu_loop(CPUAlphaState *env) { CPUState *cs = CPU(alpha_env_get_cpu(env)); @@ -3327,13 +3100,12 @@ void cpu_loop(CPUAlphaState *env) queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); } break; - case EXCP_STL_C: - case EXCP_STQ_C: - do_store_exclusive(env, env->error_code, trapnr - EXCP_STL_C); - break; case EXCP_INTERRUPT: /* Just indicate that signals should be handled asap. */ break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: printf ("Unhandled trap: 0x%x\n", trapnr); cpu_dump_state(cs, stderr, fprintf, 0); @@ -3463,6 +3235,9 @@ void cpu_loop(CPUS390XState *env) queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: fprintf(stderr, "Unhandled trap: 0x%x\n", trapnr); cpu_dump_state(cs, stderr, fprintf, 0); @@ -3717,6 +3492,9 @@ void cpu_loop(CPUTLGState *env) case TILEGX_EXCP_REG_UDN_ACCESS: gen_sigill_reg(env); break; + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; default: fprintf(stderr, "trapnr is %d[0x%x].\n", trapnr, trapnr); g_assert_not_reached(); diff --git a/linux-user/syscall.c b/linux-user/syscall.c index db697c0bf3..7b77503f94 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -6164,6 +6164,14 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, sigfillset(&sigmask); sigprocmask(SIG_BLOCK, &sigmask, &info.sigmask); + /* If this is our first additional thread, we need to ensure we + * generate code for parallel execution and flush old translations. + */ + if (!parallel_cpus) { + parallel_cpus = true; + tb_flush(cpu); + } + ret = pthread_create(&info.thread, &attr, clone_func, &info); /* TODO: Free new CPU state if thread creation failed. */ diff --git a/net/colo-compare.c b/net/colo-compare.c index 109990fd8e..f791383dbc 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -188,7 +188,6 @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) { struct tcphdr *ptcp, *stcp; int res; - char *sdebug, *ddebug; trace_colo_compare_main("compare tcp"); if (ppkt->size != spkt->size) { @@ -219,24 +218,21 @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) (spkt->size - ETH_HLEN)); if (res != 0 && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) { - sdebug = strdup(inet_ntoa(ppkt->ip->ip_src)); - ddebug = strdup(inet_ntoa(ppkt->ip->ip_dst)); - fprintf(stderr, "%s: src/dst: %s/%s p: seq/ack=%u/%u" - " s: seq/ack=%u/%u res=%d flags=%x/%x\n", - __func__, sdebug, ddebug, - (unsigned int)ntohl(ptcp->th_seq), - (unsigned int)ntohl(ptcp->th_ack), - (unsigned int)ntohl(stcp->th_seq), - (unsigned int)ntohl(stcp->th_ack), - res, ptcp->th_flags, stcp->th_flags); - - fprintf(stderr, "Primary len = %d\n", ppkt->size); - qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", ppkt->size); - fprintf(stderr, "Secondary len = %d\n", spkt->size); - qemu_hexdump((char *)spkt->data, stderr, "colo-compare", spkt->size); - - g_free(sdebug); - g_free(ddebug); + trace_colo_compare_pkt_info(inet_ntoa(ppkt->ip->ip_src), + inet_ntoa(ppkt->ip->ip_dst), + ntohl(ptcp->th_seq), + ntohl(ptcp->th_ack), + ntohl(stcp->th_seq), + ntohl(stcp->th_ack), + res, ptcp->th_flags, + stcp->th_flags, + ppkt->size, + spkt->size); + + qemu_hexdump((char *)ppkt->data, stderr, + "colo-compare ppkt", ppkt->size); + qemu_hexdump((char *)spkt->data, stderr, + "colo-compare spkt", spkt->size); } return res; diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c index 89abe72d4e..c4ab91cdee 100644 --- a/net/filter-rewriter.c +++ b/net/filter-rewriter.c @@ -68,15 +68,11 @@ static int handle_primary_tcp_pkt(NetFilterState *nf, tcp_pkt = (struct tcphdr *)pkt->transport_header; if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) { - char *sdebug, *ddebug; - sdebug = strdup(inet_ntoa(pkt->ip->ip_src)); - ddebug = strdup(inet_ntoa(pkt->ip->ip_dst)); - trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug, + trace_colo_filter_rewriter_pkt_info(__func__, + inet_ntoa(pkt->ip->ip_src), inet_ntoa(pkt->ip->ip_dst), ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack), tcp_pkt->th_flags); trace_colo_filter_rewriter_conn_offset(conn->offset); - g_free(sdebug); - g_free(ddebug); } if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) { @@ -116,15 +112,11 @@ static int handle_secondary_tcp_pkt(NetFilterState *nf, tcp_pkt = (struct tcphdr *)pkt->transport_header; if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) { - char *sdebug, *ddebug; - sdebug = strdup(inet_ntoa(pkt->ip->ip_src)); - ddebug = strdup(inet_ntoa(pkt->ip->ip_dst)); - trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug, + trace_colo_filter_rewriter_pkt_info(__func__, + inet_ntoa(pkt->ip->ip_src), inet_ntoa(pkt->ip->ip_dst), ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack), tcp_pkt->th_flags); trace_colo_filter_rewriter_conn_offset(conn->offset); - g_free(sdebug); - g_free(ddebug); } if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) { @@ -162,6 +154,7 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf, iov_to_buf(iov, iovcnt, 0, buf, size); pkt = packet_new(buf, size); + g_free(buf); /* * if we get tcp packet diff --git a/net/tap-bsd.c b/net/tap-bsd.c index c506ac31d6..6c9692263d 100644 --- a/net/tap-bsd.c +++ b/net/tap-bsd.c @@ -35,6 +35,10 @@ #include <net/if_tap.h> #endif +#if defined(__OpenBSD__) +#include <sys/param.h> +#endif + #ifndef __FreeBSD__ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required, int mq_required, Error **errp) @@ -55,7 +59,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, if (*ifname) { snprintf(dname, sizeof dname, "/dev/%s", ifname); } else { -#if defined(__OpenBSD__) +#if defined(__OpenBSD__) && OpenBSD < 201605 snprintf(dname, sizeof dname, "/dev/tun%d", i); #else snprintf(dname, sizeof dname, "/dev/tap%d", i); diff --git a/net/trace-events b/net/trace-events index d67f048825..b1913a6666 100644 --- a/net/trace-events +++ b/net/trace-events @@ -13,6 +13,7 @@ colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d" colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s" colo_old_packet_check_found(int64_t old_time) "%" PRId64 colo_compare_miscompare(void) "" +colo_compare_pkt_info(const char *src, const char *dst, uint32_t pseq, uint32_t pack, uint32_t sseq, uint32_t sack, int res, uint32_t pflag, uint32_t sflag, int psize, int ssize) "src/dst: %s/%s p: seq/ack=%u/%u s: seq/ack=%u/%u res=%d flags=%x/%x ppkt_size: %d spkt_size: %d\n" # net/filter-rewriter.c colo_filter_rewriter_debug(void) "" diff --git a/scripts/tracetool/backend/simple.py b/scripts/tracetool/backend/simple.py index 9885e83cd4..85f61028e2 100644 --- a/scripts/tracetool/backend/simple.py +++ b/scripts/tracetool/backend/simple.py @@ -21,7 +21,8 @@ PUBLIC = True def is_string(arg): strtype = ('const char*', 'char*', 'const char *', 'char *') - if arg.lstrip().startswith(strtype): + arg_strip = arg.lstrip() + if arg_strip.startswith(strtype) and arg_strip.count('*') == 1: return True else: return False diff --git a/softmmu_template.h b/softmmu_template.h index 27ed2694df..4a2b6653f6 100644 --- a/softmmu_template.h +++ b/softmmu_template.h @@ -21,12 +21,6 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#include "qemu/timer.h" -#include "exec/address-spaces.h" -#include "exec/memory.h" - -#define DATA_SIZE (1 << SHIFT) - #if DATA_SIZE == 8 #define SUFFIX q #define LSUFFIX q @@ -84,14 +78,6 @@ # define BSWAP(X) (X) #endif -#ifdef TARGET_WORDS_BIGENDIAN -# define TGT_BE(X) (X) -# define TGT_LE(X) BSWAP(X) -#else -# define TGT_BE(X) BSWAP(X) -# define TGT_LE(X) (X) -#endif - #if DATA_SIZE == 1 # define helper_le_ld_name glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX) # define helper_be_ld_name helper_le_ld_name @@ -108,35 +94,14 @@ # define helper_be_st_name glue(glue(helper_be_st, SUFFIX), MMUSUFFIX) #endif -#ifdef TARGET_WORDS_BIGENDIAN -# define helper_te_ld_name helper_be_ld_name -# define helper_te_st_name helper_be_st_name -#else -# define helper_te_ld_name helper_le_ld_name -# define helper_te_st_name helper_le_st_name -#endif - #ifndef SOFTMMU_CODE_ACCESS static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, - CPUIOTLBEntry *iotlbentry, + size_t mmu_idx, size_t index, target_ulong addr, uintptr_t retaddr) { - uint64_t val; - CPUState *cpu = ENV_GET_CPU(env); - hwaddr physaddr = iotlbentry->addr; - MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs); - - physaddr = (physaddr & TARGET_PAGE_MASK) + addr; - cpu->mem_io_pc = retaddr; - if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { - cpu_io_recompile(cpu, retaddr); - } - - cpu->mem_io_vaddr = addr; - memory_region_dispatch_read(mr, physaddr, &val, 1 << SHIFT, - iotlbentry->attrs); - return val; + CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index]; + return io_readx(env, iotlbentry, addr, retaddr, DATA_SIZE); } #endif @@ -167,15 +132,13 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, /* Handle an IO access. */ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - CPUIOTLBEntry *iotlbentry; if ((addr & (DATA_SIZE - 1)) != 0) { goto do_unaligned_access; } - iotlbentry = &env->iotlb[mmu_idx][index]; /* ??? Note that the io helpers always read data in the target byte ordering. We should push the LE/BE request down into io. */ - res = glue(io_read, SUFFIX)(env, iotlbentry, addr, retaddr); + res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr); res = TGT_LE(res); return res; } @@ -236,15 +199,13 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, /* Handle an IO access. */ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - CPUIOTLBEntry *iotlbentry; if ((addr & (DATA_SIZE - 1)) != 0) { goto do_unaligned_access; } - iotlbentry = &env->iotlb[mmu_idx][index]; /* ??? Note that the io helpers always read data in the target byte ordering. We should push the LE/BE request down into io. */ - res = glue(io_read, SUFFIX)(env, iotlbentry, addr, retaddr); + res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr); res = TGT_BE(res); return res; } @@ -295,24 +256,13 @@ WORD_TYPE helper_be_lds_name(CPUArchState *env, target_ulong addr, #endif static inline void glue(io_write, SUFFIX)(CPUArchState *env, - CPUIOTLBEntry *iotlbentry, + size_t mmu_idx, size_t index, DATA_TYPE val, target_ulong addr, uintptr_t retaddr) { - CPUState *cpu = ENV_GET_CPU(env); - hwaddr physaddr = iotlbentry->addr; - MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs); - - physaddr = (physaddr & TARGET_PAGE_MASK) + addr; - if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { - cpu_io_recompile(cpu, retaddr); - } - - cpu->mem_io_vaddr = addr; - cpu->mem_io_pc = retaddr; - memory_region_dispatch_write(mr, physaddr, val, 1 << SHIFT, - iotlbentry->attrs); + CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index]; + return io_writex(env, iotlbentry, val, addr, retaddr, DATA_SIZE); } void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, @@ -340,16 +290,14 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, /* Handle an IO access. */ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - CPUIOTLBEntry *iotlbentry; if ((addr & (DATA_SIZE - 1)) != 0) { goto do_unaligned_access; } - iotlbentry = &env->iotlb[mmu_idx][index]; /* ??? Note that the io helpers always read data in the target byte ordering. We should push the LE/BE request down into io. */ val = TGT_LE(val); - glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr); + glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, retaddr); return; } @@ -418,16 +366,14 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, /* Handle an IO access. */ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - CPUIOTLBEntry *iotlbentry; if ((addr & (DATA_SIZE - 1)) != 0) { goto do_unaligned_access; } - iotlbentry = &env->iotlb[mmu_idx][index]; /* ??? Note that the io helpers always read data in the target byte ordering. We should push the LE/BE request down into io. */ val = TGT_BE(val); - glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr); + glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, retaddr); return; } @@ -466,33 +412,9 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val); } #endif /* DATA_SIZE > 1 */ - -#if DATA_SIZE == 1 -/* Probe for whether the specified guest write access is permitted. - * If it is not permitted then an exception will be taken in the same - * way as if this were a real write access (and we will not return). - * Otherwise the function will return, and there will be a valid - * entry in the TLB for this access. - */ -void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, - uintptr_t retaddr) -{ - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; - - if ((addr & TARGET_PAGE_MASK) - != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { - /* TLB entry is for a different page */ - if (!VICTIM_TLB_HIT(addr_write, addr)) { - tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr); - } - } -} -#endif #endif /* !defined(SOFTMMU_CODE_ACCESS) */ #undef READ_ACCESS_TYPE -#undef SHIFT #undef DATA_TYPE #undef SUFFIX #undef LSUFFIX @@ -503,15 +425,9 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, #undef USUFFIX #undef SSUFFIX #undef BSWAP -#undef TGT_BE -#undef TGT_LE -#undef CPU_BE -#undef CPU_LE #undef helper_le_ld_name #undef helper_be_ld_name #undef helper_le_lds_name #undef helper_be_lds_name #undef helper_le_st_name #undef helper_be_st_name -#undef helper_te_ld_name -#undef helper_te_st_name diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h index dcdd0416bd..b08d1601d1 100644 --- a/target-alpha/cpu.h +++ b/target-alpha/cpu.h @@ -201,7 +201,7 @@ enum { /* MMU modes definitions */ -/* Alpha has 5 MMU modes: PALcode, kernel, executive, supervisor, and user. +/* Alpha has 5 MMU modes: PALcode, Kernel, Executive, Supervisor, and User. The Unix PALcode only exposes the kernel and user modes; presumably executive and supervisor are used by VMS. @@ -209,22 +209,18 @@ enum { there are PALmode instructions that can access data via physical mode or via an os-installed "alternate mode", which is one of the 4 above. - QEMU does not currently properly distinguish between code/data when - looking up addresses. To avoid having to address this issue, our - emulated PALcode will cheat and use the KSEG mapping for its code+data - rather than physical addresses. + That said, we're only emulating Unix PALcode, and not attempting VMS, + so we don't need to implement Executive and Supervisor. QEMU's own + PALcode cheats and usees the KSEG mapping for its code+data rather than + physical addresses. */ - Moreover, we're only emulating Unix PALcode, and not attempting VMS. - - All of which allows us to drop all but kernel and user modes. - Elide the unused MMU modes to save space. */ - -#define NB_MMU_MODES 2 +#define NB_MMU_MODES 3 #define MMU_MODE0_SUFFIX _kernel #define MMU_MODE1_SUFFIX _user #define MMU_KERNEL_IDX 0 #define MMU_USER_IDX 1 +#define MMU_PHYS_IDX 2 typedef struct CPUAlphaState CPUAlphaState; @@ -234,7 +230,6 @@ struct CPUAlphaState { uint64_t pc; uint64_t unique; uint64_t lock_addr; - uint64_t lock_st_addr; uint64_t lock_value; /* The FPCR, and disassembled portions thereof. */ @@ -350,9 +345,6 @@ enum { EXCP_ARITH, EXCP_FEN, EXCP_CALL_PAL, - /* For Usermode emulation. */ - EXCP_STL_C, - EXCP_STQ_C, }; /* Alpha-specific interrupt pending bits. */ diff --git a/target-alpha/helper.c b/target-alpha/helper.c index 85168b7ed1..2ef6cbe2a4 100644 --- a/target-alpha/helper.c +++ b/target-alpha/helper.c @@ -126,6 +126,14 @@ static int get_physical_address(CPUAlphaState *env, target_ulong addr, int prot = 0; int ret = MM_K_ACV; + /* Handle physical accesses. */ + if (mmu_idx == MMU_PHYS_IDX) { + phys = addr; + prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + ret = -1; + goto exit; + } + /* Ensure that the virtual address is properly sign-extended from the last implemented virtual address bit. */ if (saddr >> TARGET_VIRT_ADDR_SPACE_BITS != saddr >> 63) { @@ -298,12 +306,6 @@ void alpha_cpu_do_interrupt(CPUState *cs) case EXCP_CALL_PAL: name = "call_pal"; break; - case EXCP_STL_C: - name = "stl_c"; - break; - case EXCP_STQ_C: - name = "stq_c"; - break; } qemu_log("INT %6d: %s(%#x) pc=%016" PRIx64 " sp=%016" PRIx64 "\n", ++count, name, env->error_code, env->pc, env->ir[IR_SP]); diff --git a/target-alpha/helper.h b/target-alpha/helper.h index c3d8a3ee49..004221df8c 100644 --- a/target-alpha/helper.h +++ b/target-alpha/helper.h @@ -92,15 +92,6 @@ DEF_HELPER_FLAGS_2(ieee_input_cmp, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_2(ieee_input_s, TCG_CALL_NO_WG, void, env, i64) #if !defined (CONFIG_USER_ONLY) -DEF_HELPER_2(ldl_phys, i64, env, i64) -DEF_HELPER_2(ldq_phys, i64, env, i64) -DEF_HELPER_2(ldl_l_phys, i64, env, i64) -DEF_HELPER_2(ldq_l_phys, i64, env, i64) -DEF_HELPER_3(stl_phys, void, env, i64, i64) -DEF_HELPER_3(stq_phys, void, env, i64, i64) -DEF_HELPER_3(stl_c_phys, i64, env, i64, i64) -DEF_HELPER_3(stq_c_phys, i64, env, i64, i64) - DEF_HELPER_FLAGS_1(tbia, TCG_CALL_NO_RWG, void, env) DEF_HELPER_FLAGS_2(tbis, TCG_CALL_NO_RWG, void, env, i64) DEF_HELPER_FLAGS_1(tb_flush, TCG_CALL_NO_RWG, void, env) diff --git a/target-alpha/machine.c b/target-alpha/machine.c index 710b7835b4..b99a123a39 100644 --- a/target-alpha/machine.c +++ b/target-alpha/machine.c @@ -45,8 +45,6 @@ static VMStateField vmstate_env_fields[] = { VMSTATE_UINTTL(unique, CPUAlphaState), VMSTATE_UINTTL(lock_addr, CPUAlphaState), VMSTATE_UINTTL(lock_value, CPUAlphaState), - /* Note that lock_st_addr is not saved; it is a temporary - used during the execution of the st[lq]_c insns. */ VMSTATE_UINT8(ps, CPUAlphaState), VMSTATE_UINT8(intr_flag, CPUAlphaState), diff --git a/target-alpha/mem_helper.c b/target-alpha/mem_helper.c index 1b2be50be7..78a7d45590 100644 --- a/target-alpha/mem_helper.c +++ b/target-alpha/mem_helper.c @@ -25,79 +25,6 @@ /* Softmmu support */ #ifndef CONFIG_USER_ONLY - -uint64_t helper_ldl_phys(CPUAlphaState *env, uint64_t p) -{ - CPUState *cs = CPU(alpha_env_get_cpu(env)); - return (int32_t)ldl_phys(cs->as, p); -} - -uint64_t helper_ldq_phys(CPUAlphaState *env, uint64_t p) -{ - CPUState *cs = CPU(alpha_env_get_cpu(env)); - return ldq_phys(cs->as, p); -} - -uint64_t helper_ldl_l_phys(CPUAlphaState *env, uint64_t p) -{ - CPUState *cs = CPU(alpha_env_get_cpu(env)); - env->lock_addr = p; - return env->lock_value = (int32_t)ldl_phys(cs->as, p); -} - -uint64_t helper_ldq_l_phys(CPUAlphaState *env, uint64_t p) -{ - CPUState *cs = CPU(alpha_env_get_cpu(env)); - env->lock_addr = p; - return env->lock_value = ldq_phys(cs->as, p); -} - -void helper_stl_phys(CPUAlphaState *env, uint64_t p, uint64_t v) -{ - CPUState *cs = CPU(alpha_env_get_cpu(env)); - stl_phys(cs->as, p, v); -} - -void helper_stq_phys(CPUAlphaState *env, uint64_t p, uint64_t v) -{ - CPUState *cs = CPU(alpha_env_get_cpu(env)); - stq_phys(cs->as, p, v); -} - -uint64_t helper_stl_c_phys(CPUAlphaState *env, uint64_t p, uint64_t v) -{ - CPUState *cs = CPU(alpha_env_get_cpu(env)); - uint64_t ret = 0; - - if (p == env->lock_addr) { - int32_t old = ldl_phys(cs->as, p); - if (old == (int32_t)env->lock_value) { - stl_phys(cs->as, p, v); - ret = 1; - } - } - env->lock_addr = -1; - - return ret; -} - -uint64_t helper_stq_c_phys(CPUAlphaState *env, uint64_t p, uint64_t v) -{ - CPUState *cs = CPU(alpha_env_get_cpu(env)); - uint64_t ret = 0; - - if (p == env->lock_addr) { - uint64_t old = ldq_phys(cs->as, p); - if (old == env->lock_value) { - stq_phys(cs->as, p, v); - ret = 1; - } - } - env->lock_addr = -1; - - return ret; -} - void alpha_cpu_do_unaligned_access(CPUState *cs, vaddr addr, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) diff --git a/target-alpha/translate.c b/target-alpha/translate.c index c27c7b9cc4..03e47765ed 100644 --- a/target-alpha/translate.c +++ b/target-alpha/translate.c @@ -99,7 +99,6 @@ static TCGv cpu_std_ir[31]; static TCGv cpu_fir[31]; static TCGv cpu_pc; static TCGv cpu_lock_addr; -static TCGv cpu_lock_st_addr; static TCGv cpu_lock_value; #ifndef CONFIG_USER_ONLY @@ -116,7 +115,6 @@ void alpha_translate_init(void) static const GlobalVar vars[] = { DEF_VAR(pc), DEF_VAR(lock_addr), - DEF_VAR(lock_st_addr), DEF_VAR(lock_value), }; @@ -198,6 +196,23 @@ static TCGv dest_sink(DisasContext *ctx) return ctx->sink; } +static void free_context_temps(DisasContext *ctx) +{ + if (!TCGV_IS_UNUSED_I64(ctx->sink)) { + tcg_gen_discard_i64(ctx->sink); + tcg_temp_free(ctx->sink); + TCGV_UNUSED_I64(ctx->sink); + } + if (!TCGV_IS_UNUSED_I64(ctx->zero)) { + tcg_temp_free(ctx->zero); + TCGV_UNUSED_I64(ctx->zero); + } + if (!TCGV_IS_UNUSED_I64(ctx->lit)) { + tcg_temp_free(ctx->lit); + TCGV_UNUSED_I64(ctx->lit); + } +} + static TCGv load_gpr(DisasContext *ctx, unsigned reg) { if (likely(reg < 31)) { @@ -392,59 +407,40 @@ static inline void gen_store_mem(DisasContext *ctx, } static ExitStatus gen_store_conditional(DisasContext *ctx, int ra, int rb, - int32_t disp16, int quad) + int32_t disp16, int mem_idx, + TCGMemOp op) { - TCGv addr; - - if (ra == 31) { - /* ??? Don't bother storing anything. The user can't tell - the difference, since the zero register always reads zero. */ - return NO_EXIT; - } - -#if defined(CONFIG_USER_ONLY) - addr = cpu_lock_st_addr; -#else - addr = tcg_temp_local_new(); -#endif + TCGLabel *lab_fail, *lab_done; + TCGv addr, val; + addr = tcg_temp_new_i64(); tcg_gen_addi_i64(addr, load_gpr(ctx, rb), disp16); + free_context_temps(ctx); -#if defined(CONFIG_USER_ONLY) - /* ??? This is handled via a complicated version of compare-and-swap - in the cpu_loop. Hopefully one day we'll have a real CAS opcode - in TCG so that this isn't necessary. */ - return gen_excp(ctx, quad ? EXCP_STQ_C : EXCP_STL_C, ra); -#else - /* ??? In system mode we are never multi-threaded, so CAS can be - implemented via a non-atomic load-compare-store sequence. */ - { - TCGLabel *lab_fail, *lab_done; - TCGv val; - - lab_fail = gen_new_label(); - lab_done = gen_new_label(); - tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail); + lab_fail = gen_new_label(); + lab_done = gen_new_label(); + tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail); + tcg_temp_free_i64(addr); - val = tcg_temp_new(); - tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, quad ? MO_LEQ : MO_LESL); - tcg_gen_brcond_i64(TCG_COND_NE, val, cpu_lock_value, lab_fail); + val = tcg_temp_new_i64(); + tcg_gen_atomic_cmpxchg_i64(val, cpu_lock_addr, cpu_lock_value, + load_gpr(ctx, ra), mem_idx, op); + free_context_temps(ctx); - tcg_gen_qemu_st_i64(ctx->ir[ra], addr, ctx->mem_idx, - quad ? MO_LEQ : MO_LEUL); - tcg_gen_movi_i64(ctx->ir[ra], 1); - tcg_gen_br(lab_done); + if (ra != 31) { + tcg_gen_setcond_i64(TCG_COND_EQ, ctx->ir[ra], val, cpu_lock_value); + } + tcg_temp_free_i64(val); + tcg_gen_br(lab_done); - gen_set_label(lab_fail); + gen_set_label(lab_fail); + if (ra != 31) { tcg_gen_movi_i64(ctx->ir[ra], 0); - - gen_set_label(lab_done); - tcg_gen_movi_i64(cpu_lock_addr, -1); - - tcg_temp_free(addr); - return NO_EXIT; } -#endif + + gen_set_label(lab_done); + tcg_gen_movi_i64(cpu_lock_addr, -1); + return NO_EXIT; } static bool in_superpage(DisasContext *ctx, int64_t addr) @@ -2423,19 +2419,19 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0xF) { case 0x0: /* Longword physical access (hw_ldl/p) */ - gen_helper_ldl_phys(va, cpu_env, addr); + tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LESL); break; case 0x1: /* Quadword physical access (hw_ldq/p) */ - gen_helper_ldq_phys(va, cpu_env, addr); + tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LEQ); break; case 0x2: /* Longword physical access with lock (hw_ldl_l/p) */ - gen_helper_ldl_l_phys(va, cpu_env, addr); + gen_qemu_ldl_l(va, addr, MMU_PHYS_IDX); break; case 0x3: /* Quadword physical access with lock (hw_ldq_l/p) */ - gen_helper_ldq_l_phys(va, cpu_env, addr); + gen_qemu_ldq_l(va, addr, MMU_PHYS_IDX); break; case 0x4: /* Longword virtual PTE fetch (hw_ldl/v) */ @@ -2674,27 +2670,34 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn) #ifndef CONFIG_USER_ONLY REQUIRE_TB_FLAG(TB_FLAGS_PAL_MODE); { - TCGv addr = tcg_temp_new(); - va = load_gpr(ctx, ra); - vb = load_gpr(ctx, rb); - - tcg_gen_addi_i64(addr, vb, disp12); switch ((insn >> 12) & 0xF) { case 0x0: /* Longword physical access */ - gen_helper_stl_phys(cpu_env, addr, va); + va = load_gpr(ctx, ra); + vb = load_gpr(ctx, rb); + tmp = tcg_temp_new(); + tcg_gen_addi_i64(tmp, vb, disp12); + tcg_gen_qemu_st_i64(va, tmp, MMU_PHYS_IDX, MO_LESL); + tcg_temp_free(tmp); break; case 0x1: /* Quadword physical access */ - gen_helper_stq_phys(cpu_env, addr, va); + va = load_gpr(ctx, ra); + vb = load_gpr(ctx, rb); + tmp = tcg_temp_new(); + tcg_gen_addi_i64(tmp, vb, disp12); + tcg_gen_qemu_st_i64(va, tmp, MMU_PHYS_IDX, MO_LEQ); + tcg_temp_free(tmp); break; case 0x2: /* Longword physical access with lock */ - gen_helper_stl_c_phys(dest_gpr(ctx, ra), cpu_env, addr, va); + ret = gen_store_conditional(ctx, ra, rb, disp12, + MMU_PHYS_IDX, MO_LESL); break; case 0x3: /* Quadword physical access with lock */ - gen_helper_stq_c_phys(dest_gpr(ctx, ra), cpu_env, addr, va); + ret = gen_store_conditional(ctx, ra, rb, disp12, + MMU_PHYS_IDX, MO_LEQ); break; case 0x4: /* Longword virtual access */ @@ -2733,7 +2736,6 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn) /* Invalid */ goto invalid_opc; } - tcg_temp_free(addr); break; } #else @@ -2797,11 +2799,13 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn) break; case 0x2E: /* STL_C */ - ret = gen_store_conditional(ctx, ra, rb, disp16, 0); + ret = gen_store_conditional(ctx, ra, rb, disp16, + ctx->mem_idx, MO_LESL); break; case 0x2F: /* STQ_C */ - ret = gen_store_conditional(ctx, ra, rb, disp16, 1); + ret = gen_store_conditional(ctx, ra, rb, disp16, + ctx->mem_idx, MO_LEQ); break; case 0x30: /* BR */ @@ -2906,6 +2910,10 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb) /* Similarly for flush-to-zero. */ ctx.tb_ftz = -1; + TCGV_UNUSED_I64(ctx.zero); + TCGV_UNUSED_I64(ctx.sink); + TCGV_UNUSED_I64(ctx.lit); + num_insns = 0; max_insns = tb->cflags & CF_COUNT_MASK; if (max_insns == 0) { @@ -2940,23 +2948,9 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb) } insn = cpu_ldl_code(env, ctx.pc); - TCGV_UNUSED_I64(ctx.zero); - TCGV_UNUSED_I64(ctx.sink); - TCGV_UNUSED_I64(ctx.lit); - ctx.pc += 4; ret = translate_one(ctxp, insn); - - if (!TCGV_IS_UNUSED_I64(ctx.sink)) { - tcg_gen_discard_i64(ctx.sink); - tcg_temp_free(ctx.sink); - } - if (!TCGV_IS_UNUSED_I64(ctx.zero)) { - tcg_temp_free(ctx.zero); - } - if (!TCGV_IS_UNUSED_I64(ctx.lit)) { - tcg_temp_free(ctx.lit); - } + free_context_temps(ctxp); /* If we reach a page boundary, are single stepping, or exhaust instruction count, stop generation. */ diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 9d75227e04..19d967b69e 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -46,7 +46,6 @@ #define EXCP_BKPT 7 #define EXCP_EXCEPTION_EXIT 8 /* Return from v7M exception. */ #define EXCP_KERNEL_TRAP 9 /* Jumped to kernel code page. */ -#define EXCP_STREX 10 #define EXCP_HVC 11 /* HyperVisor Call */ #define EXCP_HYP_TRAP 12 #define EXCP_SMC 13 /* Secure Monitor Call */ @@ -475,10 +474,6 @@ typedef struct CPUARMState { uint64_t exclusive_addr; uint64_t exclusive_val; uint64_t exclusive_high; -#if defined(CONFIG_USER_ONLY) - uint64_t exclusive_test; - uint32_t exclusive_info; -#endif /* iwMMXt coprocessor state. */ struct { diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c index 41e48a41b4..98b97df461 100644 --- a/target-arm/helper-a64.c +++ b/target-arm/helper-a64.c @@ -27,6 +27,10 @@ #include "qemu/bitops.h" #include "internals.h" #include "qemu/crc32c.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "qemu/int128.h" +#include "tcg.h" #include <zlib.h> /* For crc32 */ /* C2.4.7 Multiply and divide */ @@ -444,3 +448,112 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) /* Linux crc32c converts the output to one's complement. */ return crc32c(acc, buf, bytes) ^ 0xffffffff; } + +/* Returns 0 on success; 1 otherwise. */ +uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, + uint64_t new_lo, uint64_t new_hi) +{ + uintptr_t ra = GETPC(); + Int128 oldv, cmpv, newv; + bool success; + + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + newv = int128_make128(new_lo, new_hi); + + if (parallel_cpus) { +#ifndef CONFIG_ATOMIC128 + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); + success = int128_eq(oldv, cmpv); +#endif + } else { + uint64_t o0, o1; + +#ifdef CONFIG_USER_ONLY + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + o0 = ldq_le_p(haddr + 0); + o1 = ldq_le_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_le_p(haddr + 0, int128_getlo(newv)); + stq_le_p(haddr + 1, int128_gethi(newv)); + } +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); + + o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); + o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); + helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); + } +#endif + } + + return !success; +} + +uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, + uint64_t new_lo, uint64_t new_hi) +{ + uintptr_t ra = GETPC(); + Int128 oldv, cmpv, newv; + bool success; + + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + newv = int128_make128(new_lo, new_hi); + + if (parallel_cpus) { +#ifndef CONFIG_ATOMIC128 + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + success = int128_eq(oldv, cmpv); +#endif + } else { + uint64_t o0, o1; + +#ifdef CONFIG_USER_ONLY + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + o1 = ldq_be_p(haddr + 0); + o0 = ldq_be_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_be_p(haddr + 0, int128_gethi(newv)); + stq_be_p(haddr + 1, int128_getlo(newv)); + } +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); + + o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); + o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); + helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); + } +#endif + } + + return !success; +} diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h index 1d3d10fffb..dd32000e63 100644 --- a/target-arm/helper-a64.h +++ b/target-arm/helper-a64.h @@ -46,3 +46,5 @@ DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env) DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) +DEF_HELPER_FLAGS_4(paired_cmpxchg64_le, TCG_CALL_NO_WG, i64, env, i64, i64, i64) +DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64) diff --git a/target-arm/internals.h b/target-arm/internals.h index cd574017c6..3edccd2529 100644 --- a/target-arm/internals.h +++ b/target-arm/internals.h @@ -46,8 +46,7 @@ static inline bool excp_is_internal(int excp) || excp == EXCP_HALTED || excp == EXCP_EXCEPTION_EXIT || excp == EXCP_KERNEL_TRAP - || excp == EXCP_SEMIHOST - || excp == EXCP_STREX; + || excp == EXCP_SEMIHOST; } /* Exception names for debug logging; note that not all of these @@ -63,7 +62,6 @@ static const char * const excnames[] = { [EXCP_BKPT] = "Breakpoint", [EXCP_EXCEPTION_EXIT] = "QEMU v7M exception exit", [EXCP_KERNEL_TRAP] = "QEMU intercept of kernel commpage", - [EXCP_STREX] = "QEMU intercept of STREX", [EXCP_HVC] = "Hypervisor Call", [EXCP_HYP_TRAP] = "Hypervisor Trap", [EXCP_SMC] = "Secure Monitor Call", diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 96c222722e..ded924a0a9 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -1839,37 +1839,41 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn) } } -/* - * Load/Store exclusive instructions are implemented by remembering - * the value/address loaded, and seeing if these are the same - * when the store is performed. This is not actually the architecturally - * mandated semantics, but it works for typical guest code sequences - * and avoids having to monitor regular stores. - * - * In system emulation mode only one CPU will be running at once, so - * this sequence is effectively atomic. In user emulation mode we - * throw an exception and handle the atomic operation elsewhere. - */ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, TCGv_i64 addr, int size, bool is_pair) { TCGv_i64 tmp = tcg_temp_new_i64(); - TCGMemOp memop = s->be_data + size; + TCGMemOp be = s->be_data; g_assert(size <= 3); - tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop); - if (is_pair) { - TCGv_i64 addr2 = tcg_temp_new_i64(); TCGv_i64 hitmp = tcg_temp_new_i64(); - g_assert(size >= 2); - tcg_gen_addi_i64(addr2, addr, 1 << size); - tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop); - tcg_temp_free_i64(addr2); + if (size == 3) { + TCGv_i64 addr2 = tcg_temp_new_i64(); + + tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), + MO_64 | MO_ALIGN_16 | be); + tcg_gen_addi_i64(addr2, addr, 8); + tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), + MO_64 | MO_ALIGN | be); + tcg_temp_free_i64(addr2); + } else { + g_assert(size == 2); + tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), + MO_64 | MO_ALIGN | be); + if (be == MO_LE) { + tcg_gen_extr32_i64(tmp, hitmp, tmp); + } else { + tcg_gen_extr32_i64(hitmp, tmp, tmp); + } + } + tcg_gen_mov_i64(cpu_exclusive_high, hitmp); tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp); tcg_temp_free_i64(hitmp); + } else { + tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), size | MO_ALIGN | be); } tcg_gen_mov_i64(cpu_exclusive_val, tmp); @@ -1879,16 +1883,6 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, tcg_gen_mov_i64(cpu_exclusive_addr, addr); } -#ifdef CONFIG_USER_ONLY -static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, - TCGv_i64 addr, int size, int is_pair) -{ - tcg_gen_mov_i64(cpu_exclusive_test, addr); - tcg_gen_movi_i32(cpu_exclusive_info, - size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14)); - gen_exception_internal_insn(s, 4, EXCP_STREX); -} -#else static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, TCGv_i64 inaddr, int size, int is_pair) { @@ -1916,46 +1910,42 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label); tmp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), s->be_data + size); - tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label); - tcg_temp_free_i64(tmp); - - if (is_pair) { - TCGv_i64 addrhi = tcg_temp_new_i64(); - TCGv_i64 tmphi = tcg_temp_new_i64(); - - tcg_gen_addi_i64(addrhi, addr, 1 << size); - tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s), - s->be_data + size); - tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label); - - tcg_temp_free_i64(tmphi); - tcg_temp_free_i64(addrhi); - } - - /* We seem to still have the exclusive monitor, so do the store */ - tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s), - s->be_data + size); if (is_pair) { - TCGv_i64 addrhi = tcg_temp_new_i64(); - - tcg_gen_addi_i64(addrhi, addr, 1 << size); - tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi, - get_mem_index(s), s->be_data + size); - tcg_temp_free_i64(addrhi); + if (size == 2) { + TCGv_i64 val = tcg_temp_new_i64(); + tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); + tcg_gen_concat32_i64(val, cpu_exclusive_val, cpu_exclusive_high); + tcg_gen_atomic_cmpxchg_i64(tmp, addr, val, tmp, + get_mem_index(s), + size | MO_ALIGN | s->be_data); + tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, val); + tcg_temp_free_i64(val); + } else if (s->be_data == MO_LE) { + gen_helper_paired_cmpxchg64_le(tmp, cpu_env, addr, cpu_reg(s, rt), + cpu_reg(s, rt2)); + } else { + gen_helper_paired_cmpxchg64_be(tmp, cpu_env, addr, cpu_reg(s, rt), + cpu_reg(s, rt2)); + } + } else { + TCGv_i64 val = cpu_reg(s, rt); + tcg_gen_atomic_cmpxchg_i64(tmp, addr, cpu_exclusive_val, val, + get_mem_index(s), + size | MO_ALIGN | s->be_data); + tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); } tcg_temp_free_i64(addr); - tcg_gen_movi_i64(cpu_reg(s, rd), 0); + tcg_gen_mov_i64(cpu_reg(s, rd), tmp); + tcg_temp_free_i64(tmp); tcg_gen_br(done_label); + gen_set_label(fail_label); tcg_gen_movi_i64(cpu_reg(s, rd), 1); gen_set_label(done_label); tcg_gen_movi_i64(cpu_exclusive_addr, -1); - } -#endif /* Update the Sixty-Four bit (SF) registersize. This logic is derived * from the ARMv8 specs for LDR (Shared decode for all encodings). diff --git a/target-arm/translate.c b/target-arm/translate.c index ef62f8b0c4..718f7d06f3 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -65,10 +65,6 @@ static TCGv_i32 cpu_R[16]; TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF; TCGv_i64 cpu_exclusive_addr; TCGv_i64 cpu_exclusive_val; -#ifdef CONFIG_USER_ONLY -TCGv_i64 cpu_exclusive_test; -TCGv_i32 cpu_exclusive_info; -#endif /* FIXME: These should be removed. */ static TCGv_i32 cpu_F0s, cpu_F1s; @@ -102,12 +98,6 @@ void arm_translate_init(void) offsetof(CPUARMState, exclusive_addr), "exclusive_addr"); cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env, offsetof(CPUARMState, exclusive_val), "exclusive_val"); -#ifdef CONFIG_USER_ONLY - cpu_exclusive_test = tcg_global_mem_new_i64(cpu_env, - offsetof(CPUARMState, exclusive_test), "exclusive_test"); - cpu_exclusive_info = tcg_global_mem_new_i32(cpu_env, - offsetof(CPUARMState, exclusive_info), "exclusive_info"); -#endif a64_translate_init(); } @@ -932,145 +922,103 @@ static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var) * These functions work like tcg_gen_qemu_{ld,st}* except * that the address argument is TCGv_i32 rather than TCGv. */ -#if TARGET_LONG_BITS == 32 - -#define DO_GEN_LD(SUFF, OPC, BE32_XOR) \ -static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \ - TCGv_i32 addr, int index) \ -{ \ - TCGMemOp opc = (OPC) | s->be_data; \ - /* Not needed for user-mode BE32, where we use MO_BE instead. */ \ - if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) { \ - TCGv addr_be = tcg_temp_new(); \ - tcg_gen_xori_i32(addr_be, addr, BE32_XOR); \ - tcg_gen_qemu_ld_i32(val, addr_be, index, opc); \ - tcg_temp_free(addr_be); \ - return; \ - } \ - tcg_gen_qemu_ld_i32(val, addr, index, opc); \ -} - -#define DO_GEN_ST(SUFF, OPC, BE32_XOR) \ -static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \ - TCGv_i32 addr, int index) \ -{ \ - TCGMemOp opc = (OPC) | s->be_data; \ - /* Not needed for user-mode BE32, where we use MO_BE instead. */ \ - if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) { \ - TCGv addr_be = tcg_temp_new(); \ - tcg_gen_xori_i32(addr_be, addr, BE32_XOR); \ - tcg_gen_qemu_st_i32(val, addr_be, index, opc); \ - tcg_temp_free(addr_be); \ - return; \ - } \ - tcg_gen_qemu_st_i32(val, addr, index, opc); \ -} -static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val, - TCGv_i32 addr, int index) +static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, TCGMemOp op) { - TCGMemOp opc = MO_Q | s->be_data; - tcg_gen_qemu_ld_i64(val, addr, index, opc); + TCGv addr = tcg_temp_new(); + tcg_gen_extu_i32_tl(addr, a32); + /* Not needed for user-mode BE32, where we use MO_BE instead. */ - if (!IS_USER_ONLY && s->sctlr_b) { - tcg_gen_rotri_i64(val, val, 32); + if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) { + tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE))); } + return addr; } -static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val, - TCGv_i32 addr, int index) +static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, + int index, TCGMemOp opc) { - TCGMemOp opc = MO_Q | s->be_data; - /* Not needed for user-mode BE32, where we use MO_BE instead. */ - if (!IS_USER_ONLY && s->sctlr_b) { - TCGv_i64 tmp = tcg_temp_new_i64(); - tcg_gen_rotri_i64(tmp, val, 32); - tcg_gen_qemu_st_i64(tmp, addr, index, opc); - tcg_temp_free_i64(tmp); - return; - } - tcg_gen_qemu_st_i64(val, addr, index, opc); + TCGv addr = gen_aa32_addr(s, a32, opc); + tcg_gen_qemu_ld_i32(val, addr, index, opc); + tcg_temp_free(addr); } -#else +static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, + int index, TCGMemOp opc) +{ + TCGv addr = gen_aa32_addr(s, a32, opc); + tcg_gen_qemu_st_i32(val, addr, index, opc); + tcg_temp_free(addr); +} -#define DO_GEN_LD(SUFF, OPC, BE32_XOR) \ +#define DO_GEN_LD(SUFF, OPC) \ static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \ - TCGv_i32 addr, int index) \ + TCGv_i32 a32, int index) \ { \ - TCGMemOp opc = (OPC) | s->be_data; \ - TCGv addr64 = tcg_temp_new(); \ - tcg_gen_extu_i32_i64(addr64, addr); \ - /* Not needed for user-mode BE32, where we use MO_BE instead. */ \ - if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) { \ - tcg_gen_xori_i64(addr64, addr64, BE32_XOR); \ - } \ - tcg_gen_qemu_ld_i32(val, addr64, index, opc); \ - tcg_temp_free(addr64); \ -} - -#define DO_GEN_ST(SUFF, OPC, BE32_XOR) \ + gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \ +} + +#define DO_GEN_ST(SUFF, OPC) \ static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \ - TCGv_i32 addr, int index) \ + TCGv_i32 a32, int index) \ { \ - TCGMemOp opc = (OPC) | s->be_data; \ - TCGv addr64 = tcg_temp_new(); \ - tcg_gen_extu_i32_i64(addr64, addr); \ - /* Not needed for user-mode BE32, where we use MO_BE instead. */ \ - if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) { \ - tcg_gen_xori_i64(addr64, addr64, BE32_XOR); \ - } \ - tcg_gen_qemu_st_i32(val, addr64, index, opc); \ - tcg_temp_free(addr64); \ + gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \ } -static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val, - TCGv_i32 addr, int index) +static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val) { - TCGMemOp opc = MO_Q | s->be_data; - TCGv addr64 = tcg_temp_new(); - tcg_gen_extu_i32_i64(addr64, addr); - tcg_gen_qemu_ld_i64(val, addr64, index, opc); - /* Not needed for user-mode BE32, where we use MO_BE instead. */ if (!IS_USER_ONLY && s->sctlr_b) { tcg_gen_rotri_i64(val, val, 32); } - tcg_temp_free(addr64); } -static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val, - TCGv_i32 addr, int index) +static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, + int index, TCGMemOp opc) { - TCGMemOp opc = MO_Q | s->be_data; - TCGv addr64 = tcg_temp_new(); - tcg_gen_extu_i32_i64(addr64, addr); + TCGv addr = gen_aa32_addr(s, a32, opc); + tcg_gen_qemu_ld_i64(val, addr, index, opc); + gen_aa32_frob64(s, val); + tcg_temp_free(addr); +} + +static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val, + TCGv_i32 a32, int index) +{ + gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data); +} + +static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, + int index, TCGMemOp opc) +{ + TCGv addr = gen_aa32_addr(s, a32, opc); /* Not needed for user-mode BE32, where we use MO_BE instead. */ if (!IS_USER_ONLY && s->sctlr_b) { - TCGv tmp = tcg_temp_new(); + TCGv_i64 tmp = tcg_temp_new_i64(); tcg_gen_rotri_i64(tmp, val, 32); - tcg_gen_qemu_st_i64(tmp, addr64, index, opc); - tcg_temp_free(tmp); + tcg_gen_qemu_st_i64(tmp, addr, index, opc); + tcg_temp_free_i64(tmp); } else { - tcg_gen_qemu_st_i64(val, addr64, index, opc); + tcg_gen_qemu_st_i64(val, addr, index, opc); } - tcg_temp_free(addr64); + tcg_temp_free(addr); } -#endif +static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val, + TCGv_i32 a32, int index) +{ + gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data); +} -DO_GEN_LD(8s, MO_SB, 3) -DO_GEN_LD(8u, MO_UB, 3) -DO_GEN_LD(16s, MO_SW, 2) -DO_GEN_LD(16u, MO_UW, 2) -DO_GEN_LD(32u, MO_UL, 0) -/* 'a' variants include an alignment check */ -DO_GEN_LD(16ua, MO_UW | MO_ALIGN, 2) -DO_GEN_LD(32ua, MO_UL | MO_ALIGN, 0) -DO_GEN_ST(8, MO_UB, 3) -DO_GEN_ST(16, MO_UW, 2) -DO_GEN_ST(32, MO_UL, 0) +DO_GEN_LD(8s, MO_SB) +DO_GEN_LD(8u, MO_UB) +DO_GEN_LD(16s, MO_SW) +DO_GEN_LD(16u, MO_UW) +DO_GEN_LD(32u, MO_UL) +DO_GEN_ST(8, MO_UB) +DO_GEN_ST(16, MO_UW) +DO_GEN_ST(32, MO_UL) static inline void gen_set_pc_im(DisasContext *s, target_ulong val) { @@ -7759,45 +7707,30 @@ static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi) /* Load/Store exclusive instructions are implemented by remembering the value/address loaded, and seeing if these are the same - when the store is performed. This should be sufficient to implement + when the store is performed. This should be sufficient to implement the architecturally mandated semantics, and avoids having to monitor - regular stores. - - In system emulation mode only one CPU will be running at once, so - this sequence is effectively atomic. In user emulation mode we - throw an exception and handle the atomic operation elsewhere. */ + regular stores. The compare vs the remembered value is done during + the cmpxchg operation, but we must compare the addresses manually. */ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, TCGv_i32 addr, int size) { TCGv_i32 tmp = tcg_temp_new_i32(); + TCGMemOp opc = size | MO_ALIGN | s->be_data; s->is_ldex = true; - switch (size) { - case 0: - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_ld16ua(s, tmp, addr, get_mem_index(s)); - break; - case 2: - case 3: - gen_aa32_ld32ua(s, tmp, addr, get_mem_index(s)); - break; - default: - abort(); - } - if (size == 3) { TCGv_i32 tmp2 = tcg_temp_new_i32(); - TCGv_i32 tmp3 = tcg_temp_new_i32(); + TCGv_i64 t64 = tcg_temp_new_i64(); - tcg_gen_addi_i32(tmp2, addr, 4); - gen_aa32_ld32u(s, tmp3, tmp2, get_mem_index(s)); - tcg_temp_free_i32(tmp2); - tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3); - store_reg(s, rt2, tmp3); + gen_aa32_ld_i64(s, t64, addr, get_mem_index(s), opc); + tcg_gen_mov_i64(cpu_exclusive_val, t64); + tcg_gen_extr_i64_i32(tmp, tmp2, t64); + tcg_temp_free_i64(t64); + + store_reg(s, rt2, tmp2); } else { + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc); tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp); } @@ -7810,23 +7743,15 @@ static void gen_clrex(DisasContext *s) tcg_gen_movi_i64(cpu_exclusive_addr, -1); } -#ifdef CONFIG_USER_ONLY -static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, - TCGv_i32 addr, int size) -{ - tcg_gen_extu_i32_i64(cpu_exclusive_test, addr); - tcg_gen_movi_i32(cpu_exclusive_info, - size | (rd << 4) | (rt << 8) | (rt2 << 12)); - gen_exception_internal_insn(s, 4, EXCP_STREX); -} -#else static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, TCGv_i32 addr, int size) { - TCGv_i32 tmp; - TCGv_i64 val64, extaddr; + TCGv_i32 t0, t1, t2; + TCGv_i64 extaddr; + TCGv taddr; TCGLabel *done_label; TCGLabel *fail_label; + TCGMemOp opc = size | MO_ALIGN | s->be_data; /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) { [addr] = {Rt}; @@ -7841,69 +7766,45 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label); tcg_temp_free_i64(extaddr); - tmp = tcg_temp_new_i32(); - switch (size) { - case 0: - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - break; - case 2: - case 3: - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - break; - default: - abort(); - } - - val64 = tcg_temp_new_i64(); + taddr = gen_aa32_addr(s, addr, opc); + t0 = tcg_temp_new_i32(); + t1 = load_reg(s, rt); if (size == 3) { - TCGv_i32 tmp2 = tcg_temp_new_i32(); - TCGv_i32 tmp3 = tcg_temp_new_i32(); - tcg_gen_addi_i32(tmp2, addr, 4); - gen_aa32_ld32u(s, tmp3, tmp2, get_mem_index(s)); - tcg_temp_free_i32(tmp2); - tcg_gen_concat_i32_i64(val64, tmp, tmp3); - tcg_temp_free_i32(tmp3); - } else { - tcg_gen_extu_i32_i64(val64, tmp); - } - tcg_temp_free_i32(tmp); + TCGv_i64 o64 = tcg_temp_new_i64(); + TCGv_i64 n64 = tcg_temp_new_i64(); - tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label); - tcg_temp_free_i64(val64); + t2 = load_reg(s, rt2); + tcg_gen_concat_i32_i64(n64, t1, t2); + tcg_temp_free_i32(t2); + gen_aa32_frob64(s, n64); - tmp = load_reg(s, rt); - switch (size) { - case 0: - gen_aa32_st8(s, tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); - break; - case 2: - case 3: - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - break; - default: - abort(); - } - tcg_temp_free_i32(tmp); - if (size == 3) { - tcg_gen_addi_i32(addr, addr, 4); - tmp = load_reg(s, rt2); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp); + tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64, + get_mem_index(s), opc); + tcg_temp_free_i64(n64); + + gen_aa32_frob64(s, o64); + tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val); + tcg_gen_extrl_i64_i32(t0, o64); + + tcg_temp_free_i64(o64); + } else { + t2 = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val); + tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc); + tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2); + tcg_temp_free_i32(t2); } - tcg_gen_movi_i32(cpu_R[rd], 0); + tcg_temp_free_i32(t1); + tcg_temp_free(taddr); + tcg_gen_mov_i32(cpu_R[rd], t0); + tcg_temp_free_i32(t0); tcg_gen_br(done_label); + gen_set_label(fail_label); tcg_gen_movi_i32(cpu_R[rd], 1); gen_set_label(done_label); tcg_gen_movi_i64(cpu_exclusive_addr, -1); } -#endif /* gen_srs: * @env: CPUARMState @@ -8878,25 +8779,27 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) } tcg_temp_free_i32(addr); } else { + TCGv taddr; + TCGMemOp opc = s->be_data; + /* SWP instruction */ rm = (insn) & 0xf; - /* ??? This is not really atomic. However we know - we never have multiple CPUs running in parallel, - so it is good enough. */ - addr = load_reg(s, rn); - tmp = load_reg(s, rm); - tmp2 = tcg_temp_new_i32(); if (insn & (1 << 22)) { - gen_aa32_ld8u(s, tmp2, addr, get_mem_index(s)); - gen_aa32_st8(s, tmp, addr, get_mem_index(s)); + opc |= MO_UB; } else { - gen_aa32_ld32u(s, tmp2, addr, get_mem_index(s)); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); + opc |= MO_UL | MO_ALIGN; } - tcg_temp_free_i32(tmp); + + addr = load_reg(s, rn); + taddr = gen_aa32_addr(s, addr, opc); tcg_temp_free_i32(addr); - store_reg(s, rd, tmp2); + + tmp = load_reg(s, rm); + tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, + get_mem_index(s), opc); + tcg_temp_free(taddr); + store_reg(s, rd, tmp); } } } else { diff --git a/target-arm/translate.h b/target-arm/translate.h index a53f25a407..285e96f087 100644 --- a/target-arm/translate.h +++ b/target-arm/translate.h @@ -79,10 +79,6 @@ extern TCGv_env cpu_env; extern TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF; extern TCGv_i64 cpu_exclusive_addr; extern TCGv_i64 cpu_exclusive_val; -#ifdef CONFIG_USER_ONLY -extern TCGv_i64 cpu_exclusive_test; -extern TCGv_i32 cpu_exclusive_info; -#endif static inline int arm_dc_feature(DisasContext *dc, int feature) { diff --git a/target-i386/helper.h b/target-i386/helper.h index 1320edc016..4e859eba9d 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -1,8 +1,6 @@ DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int) DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int) -DEF_HELPER_0(lock, void) -DEF_HELPER_0(unlock, void) DEF_HELPER_3(write_eflags, void, env, tl, i32) DEF_HELPER_1(read_eflags, tl, env) DEF_HELPER_2(divb_AL, void, env, tl) @@ -74,8 +72,10 @@ DEF_HELPER_3(boundw, void, env, tl, int) DEF_HELPER_3(boundl, void, env, tl, int) DEF_HELPER_1(rsm, void, env) DEF_HELPER_2(into, void, env, int) +DEF_HELPER_2(cmpxchg8b_unlocked, void, env, tl) DEF_HELPER_2(cmpxchg8b, void, env, tl) #ifdef TARGET_X86_64 +DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl) DEF_HELPER_2(cmpxchg16b, void, env, tl) #endif DEF_HELPER_1(single_step, void, env) diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c index 5bc0594dfa..70f67668ab 100644 --- a/target-i386/mem_helper.c +++ b/target-i386/mem_helper.c @@ -22,87 +22,146 @@ #include "exec/helper-proto.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" +#include "qemu/int128.h" +#include "tcg.h" -/* broken thread support */ - -#if defined(CONFIG_USER_ONLY) -QemuMutex global_cpu_lock; - -void helper_lock(void) +void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0) { - qemu_mutex_lock(&global_cpu_lock); -} + uintptr_t ra = GETPC(); + uint64_t oldv, cmpv, newv; + int eflags; -void helper_unlock(void) -{ - qemu_mutex_unlock(&global_cpu_lock); -} + eflags = cpu_cc_compute_all(env, CC_OP); -void helper_lock_init(void) -{ - qemu_mutex_init(&global_cpu_lock); -} -#else -void helper_lock(void) -{ -} + cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]); + newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]); -void helper_unlock(void) -{ -} + oldv = cpu_ldq_data_ra(env, a0, ra); + newv = (cmpv == oldv ? newv : oldv); + /* always do the store */ + cpu_stq_data_ra(env, a0, newv, ra); -void helper_lock_init(void) -{ + if (oldv == cmpv) { + eflags |= CC_Z; + } else { + env->regs[R_EAX] = (uint32_t)oldv; + env->regs[R_EDX] = (uint32_t)(oldv >> 32); + eflags &= ~CC_Z; + } + CC_SRC = eflags; } -#endif void helper_cmpxchg8b(CPUX86State *env, target_ulong a0) { - uint64_t d; +#ifdef CONFIG_ATOMIC64 + uint64_t oldv, cmpv, newv; int eflags; eflags = cpu_cc_compute_all(env, CC_OP); - d = cpu_ldq_data_ra(env, a0, GETPC()); - if (d == (((uint64_t)env->regs[R_EDX] << 32) | (uint32_t)env->regs[R_EAX])) { - cpu_stq_data_ra(env, a0, ((uint64_t)env->regs[R_ECX] << 32) - | (uint32_t)env->regs[R_EBX], GETPC()); + + cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]); + newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]); + +#ifdef CONFIG_USER_ONLY + { + uint64_t *haddr = g2h(a0); + cmpv = cpu_to_le64(cmpv); + newv = cpu_to_le64(newv); + oldv = atomic_cmpxchg__nocheck(haddr, cmpv, newv); + oldv = le64_to_cpu(oldv); + } +#else + { + uintptr_t ra = GETPC(); + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx); + oldv = helper_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra); + } +#endif + + if (oldv == cmpv) { eflags |= CC_Z; } else { - /* always do the store */ - cpu_stq_data_ra(env, a0, d, GETPC()); - env->regs[R_EDX] = (uint32_t)(d >> 32); - env->regs[R_EAX] = (uint32_t)d; + env->regs[R_EAX] = (uint32_t)oldv; + env->regs[R_EDX] = (uint32_t)(oldv >> 32); eflags &= ~CC_Z; } CC_SRC = eflags; +#else + cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC()); +#endif /* CONFIG_ATOMIC64 */ } #ifdef TARGET_X86_64 -void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) +void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0) { - uint64_t d0, d1; + uintptr_t ra = GETPC(); + Int128 oldv, cmpv, newv; + uint64_t o0, o1; int eflags; + bool success; if ((a0 & 0xf) != 0) { raise_exception_ra(env, EXCP0D_GPF, GETPC()); } eflags = cpu_cc_compute_all(env, CC_OP); - d0 = cpu_ldq_data_ra(env, a0, GETPC()); - d1 = cpu_ldq_data_ra(env, a0 + 8, GETPC()); - if (d0 == env->regs[R_EAX] && d1 == env->regs[R_EDX]) { - cpu_stq_data_ra(env, a0, env->regs[R_EBX], GETPC()); - cpu_stq_data_ra(env, a0 + 8, env->regs[R_ECX], GETPC()); + + cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); + newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]); + + o0 = cpu_ldq_data_ra(env, a0 + 0, ra); + o1 = cpu_ldq_data_ra(env, a0 + 8, ra); + + oldv = int128_make128(o0, o1); + success = int128_eq(oldv, cmpv); + if (!success) { + newv = oldv; + } + + cpu_stq_data_ra(env, a0 + 0, int128_getlo(newv), ra); + cpu_stq_data_ra(env, a0 + 8, int128_gethi(newv), ra); + + if (success) { eflags |= CC_Z; } else { - /* always do the store */ - cpu_stq_data_ra(env, a0, d0, GETPC()); - cpu_stq_data_ra(env, a0 + 8, d1, GETPC()); - env->regs[R_EDX] = d1; - env->regs[R_EAX] = d0; + env->regs[R_EAX] = int128_getlo(oldv); + env->regs[R_EDX] = int128_gethi(oldv); eflags &= ~CC_Z; } CC_SRC = eflags; } + +void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) +{ + uintptr_t ra = GETPC(); + + if ((a0 & 0xf) != 0) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } else { +#ifndef CONFIG_ATOMIC128 + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); +#else + int eflags = cpu_cc_compute_all(env, CC_OP); + + Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); + Int128 newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]); + + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + Int128 oldv = helper_atomic_cmpxchgo_le_mmu(env, a0, cmpv, + newv, oi, ra); + + if (int128_eq(oldv, cmpv)) { + eflags |= CC_Z; + } else { + env->regs[R_EAX] = int128_getlo(oldv); + env->regs[R_EDX] = int128_gethi(oldv); + eflags &= ~CC_Z; + } + CC_SRC = eflags; +#endif + } +} #endif void helper_boundw(CPUX86State *env, target_ulong a0, int v) diff --git a/target-i386/translate.c b/target-i386/translate.c index 23fde58547..927b366534 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1257,55 +1257,95 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d) { if (d != OR_TMP0) { gen_op_mov_v_reg(ot, cpu_T0, d); - } else { + } else if (!(s1->prefix & PREFIX_LOCK)) { gen_op_ld_v(s1, ot, cpu_T0, cpu_A0); } switch(op) { case OP_ADCL: gen_compute_eflags_c(s1, cpu_tmp4); - tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1); - tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4); - gen_op_st_rm_T0_A0(s1, ot, d); + if (s1->prefix & PREFIX_LOCK) { + tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1); + tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0, + s1->mem_index, ot | MO_LE); + } else { + tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4); + gen_op_st_rm_T0_A0(s1, ot, d); + } gen_op_update3_cc(cpu_tmp4); set_cc_op(s1, CC_OP_ADCB + ot); break; case OP_SBBL: gen_compute_eflags_c(s1, cpu_tmp4); - tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1); - tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4); - gen_op_st_rm_T0_A0(s1, ot, d); + if (s1->prefix & PREFIX_LOCK) { + tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4); + tcg_gen_neg_tl(cpu_T0, cpu_T0); + tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0, + s1->mem_index, ot | MO_LE); + } else { + tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4); + gen_op_st_rm_T0_A0(s1, ot, d); + } gen_op_update3_cc(cpu_tmp4); set_cc_op(s1, CC_OP_SBBB + ot); break; case OP_ADDL: - tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1); - gen_op_st_rm_T0_A0(s1, ot, d); + if (s1->prefix & PREFIX_LOCK) { + tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1, + s1->mem_index, ot | MO_LE); + } else { + tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1); + gen_op_st_rm_T0_A0(s1, ot, d); + } gen_op_update2_cc(); set_cc_op(s1, CC_OP_ADDB + ot); break; case OP_SUBL: - tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0); - tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1); - gen_op_st_rm_T0_A0(s1, ot, d); + if (s1->prefix & PREFIX_LOCK) { + tcg_gen_neg_tl(cpu_T0, cpu_T1); + tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0, + s1->mem_index, ot | MO_LE); + tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1); + } else { + tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0); + tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1); + gen_op_st_rm_T0_A0(s1, ot, d); + } gen_op_update2_cc(); set_cc_op(s1, CC_OP_SUBB + ot); break; default: case OP_ANDL: - tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1); - gen_op_st_rm_T0_A0(s1, ot, d); + if (s1->prefix & PREFIX_LOCK) { + tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1, + s1->mem_index, ot | MO_LE); + } else { + tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1); + gen_op_st_rm_T0_A0(s1, ot, d); + } gen_op_update1_cc(); set_cc_op(s1, CC_OP_LOGICB + ot); break; case OP_ORL: - tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1); - gen_op_st_rm_T0_A0(s1, ot, d); + if (s1->prefix & PREFIX_LOCK) { + tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1, + s1->mem_index, ot | MO_LE); + } else { + tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1); + gen_op_st_rm_T0_A0(s1, ot, d); + } gen_op_update1_cc(); set_cc_op(s1, CC_OP_LOGICB + ot); break; case OP_XORL: - tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1); - gen_op_st_rm_T0_A0(s1, ot, d); + if (s1->prefix & PREFIX_LOCK) { + tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1, + s1->mem_index, ot | MO_LE); + } else { + tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1); + gen_op_st_rm_T0_A0(s1, ot, d); + } gen_op_update1_cc(); set_cc_op(s1, CC_OP_LOGICB + ot); break; @@ -1321,21 +1361,23 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d) /* if d == OR_TMP0, it means memory operand (address in A0) */ static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c) { - if (d != OR_TMP0) { - gen_op_mov_v_reg(ot, cpu_T0, d); + if (s1->prefix & PREFIX_LOCK) { + tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1); + tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0, + s1->mem_index, ot | MO_LE); } else { - gen_op_ld_v(s1, ot, cpu_T0, cpu_A0); + if (d != OR_TMP0) { + gen_op_mov_v_reg(ot, cpu_T0, d); + } else { + gen_op_ld_v(s1, ot, cpu_T0, cpu_A0); + } + tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1)); + gen_op_st_rm_T0_A0(s1, ot, d); } + gen_compute_eflags_c(s1, cpu_cc_src); - if (c > 0) { - tcg_gen_addi_tl(cpu_T0, cpu_T0, 1); - set_cc_op(s1, CC_OP_INCB + ot); - } else { - tcg_gen_addi_tl(cpu_T0, cpu_T0, -1); - set_cc_op(s1, CC_OP_DECB + ot); - } - gen_op_st_rm_T0_A0(s1, ot, d); tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); + set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot); } static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result, @@ -4494,10 +4536,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, s->aflag = aflag; s->dflag = dflag; - /* lock generation */ - if (prefixes & PREFIX_LOCK) - gen_helper_lock(); - /* now check op code */ reswitch: switch(b) { @@ -4632,10 +4670,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, rm = (modrm & 7) | REX_B(s); op = (modrm >> 3) & 7; if (mod != 3) { - if (op == 0) + if (op == 0) { s->rip_offset = insn_const_size(ot); + } gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + /* For those below that handle locked memory, don't load here. */ + if (!(s->prefix & PREFIX_LOCK) + || op != 2) { + gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + } } else { gen_op_mov_v_reg(ot, cpu_T0, rm); } @@ -4648,19 +4691,58 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, set_cc_op(s, CC_OP_LOGICB + ot); break; case 2: /* not */ - tcg_gen_not_tl(cpu_T0, cpu_T0); - if (mod != 3) { - gen_op_st_v(s, ot, cpu_T0, cpu_A0); + if (s->prefix & PREFIX_LOCK) { + if (mod == 3) { + goto illegal_op; + } + tcg_gen_movi_tl(cpu_T0, ~0); + tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0, + s->mem_index, ot | MO_LE); } else { - gen_op_mov_reg_v(ot, rm, cpu_T0); + tcg_gen_not_tl(cpu_T0, cpu_T0); + if (mod != 3) { + gen_op_st_v(s, ot, cpu_T0, cpu_A0); + } else { + gen_op_mov_reg_v(ot, rm, cpu_T0); + } } break; case 3: /* neg */ - tcg_gen_neg_tl(cpu_T0, cpu_T0); - if (mod != 3) { - gen_op_st_v(s, ot, cpu_T0, cpu_A0); + if (s->prefix & PREFIX_LOCK) { + TCGLabel *label1; + TCGv a0, t0, t1, t2; + + if (mod == 3) { + goto illegal_op; + } + a0 = tcg_temp_local_new(); + t0 = tcg_temp_local_new(); + label1 = gen_new_label(); + + tcg_gen_mov_tl(a0, cpu_A0); + tcg_gen_mov_tl(t0, cpu_T0); + + gen_set_label(label1); + t1 = tcg_temp_new(); + t2 = tcg_temp_new(); + tcg_gen_mov_tl(t2, t0); + tcg_gen_neg_tl(t1, t0); + tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1, + s->mem_index, ot | MO_LE); + tcg_temp_free(t1); + tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1); + + tcg_temp_free(t2); + tcg_temp_free(a0); + tcg_gen_mov_tl(cpu_T0, t0); + tcg_temp_free(t0); } else { - gen_op_mov_reg_v(ot, rm, cpu_T0); + tcg_gen_neg_tl(cpu_T0, cpu_T0); + if (mod != 3) { + gen_op_st_v(s, ot, cpu_T0, cpu_A0); + } else { + gen_op_mov_reg_v(ot, rm, cpu_T0); + } } gen_op_update_neg_cc(); set_cc_op(s, CC_OP_SUBB + ot); @@ -5048,19 +5130,24 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, modrm = cpu_ldub_code(env, s->pc++); reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; + gen_op_mov_v_reg(ot, cpu_T0, reg); if (mod == 3) { rm = (modrm & 7) | REX_B(s); - gen_op_mov_v_reg(ot, cpu_T0, reg); gen_op_mov_v_reg(ot, cpu_T1, rm); tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1); gen_op_mov_reg_v(ot, reg, cpu_T1); gen_op_mov_reg_v(ot, rm, cpu_T0); } else { gen_lea_modrm(env, s, modrm); - gen_op_mov_v_reg(ot, cpu_T0, reg); - gen_op_ld_v(s, ot, cpu_T1, cpu_A0); - tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1); - gen_op_st_v(s, ot, cpu_T0, cpu_A0); + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0, + s->mem_index, ot | MO_LE); + tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1); + } else { + gen_op_ld_v(s, ot, cpu_T1, cpu_A0); + tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1); + gen_op_st_v(s, ot, cpu_T0, cpu_A0); + } gen_op_mov_reg_v(ot, reg, cpu_T1); } gen_op_update2_cc(); @@ -5069,57 +5156,58 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0x1b0: case 0x1b1: /* cmpxchg Ev, Gv */ { - TCGLabel *label1, *label2; - TCGv t0, t1, t2, a0; + TCGv oldv, newv, cmpv; ot = mo_b_d(b, dflag); modrm = cpu_ldub_code(env, s->pc++); reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; - t0 = tcg_temp_local_new(); - t1 = tcg_temp_local_new(); - t2 = tcg_temp_local_new(); - a0 = tcg_temp_local_new(); - gen_op_mov_v_reg(ot, t1, reg); - if (mod == 3) { - rm = (modrm & 7) | REX_B(s); - gen_op_mov_v_reg(ot, t0, rm); - } else { + oldv = tcg_temp_new(); + newv = tcg_temp_new(); + cmpv = tcg_temp_new(); + gen_op_mov_v_reg(ot, newv, reg); + tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]); + + if (s->prefix & PREFIX_LOCK) { + if (mod == 3) { + goto illegal_op; + } gen_lea_modrm(env, s, modrm); - tcg_gen_mov_tl(a0, cpu_A0); - gen_op_ld_v(s, ot, t0, a0); - rm = 0; /* avoid warning */ - } - label1 = gen_new_label(); - tcg_gen_mov_tl(t2, cpu_regs[R_EAX]); - gen_extu(ot, t0); - gen_extu(ot, t2); - tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1); - label2 = gen_new_label(); - if (mod == 3) { - gen_op_mov_reg_v(ot, R_EAX, t0); - tcg_gen_br(label2); - gen_set_label(label1); - gen_op_mov_reg_v(ot, rm, t1); + tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv, + s->mem_index, ot | MO_LE); + gen_op_mov_reg_v(ot, R_EAX, oldv); } else { - /* perform no-op store cycle like physical cpu; must be - before changing accumulator to ensure idempotency if - the store faults and the instruction is restarted */ - gen_op_st_v(s, ot, t0, a0); - gen_op_mov_reg_v(ot, R_EAX, t0); - tcg_gen_br(label2); - gen_set_label(label1); - gen_op_st_v(s, ot, t1, a0); + if (mod == 3) { + rm = (modrm & 7) | REX_B(s); + gen_op_mov_v_reg(ot, oldv, rm); + } else { + gen_lea_modrm(env, s, modrm); + gen_op_ld_v(s, ot, oldv, cpu_A0); + rm = 0; /* avoid warning */ + } + gen_extu(ot, oldv); + gen_extu(ot, cmpv); + /* store value = (old == cmp ? new : old); */ + tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv); + if (mod == 3) { + gen_op_mov_reg_v(ot, R_EAX, oldv); + gen_op_mov_reg_v(ot, rm, newv); + } else { + /* Perform an unconditional store cycle like physical cpu; + must be before changing accumulator to ensure + idempotency if the store faults and the instruction + is restarted */ + gen_op_st_v(s, ot, newv, cpu_A0); + gen_op_mov_reg_v(ot, R_EAX, oldv); + } } - gen_set_label(label2); - tcg_gen_mov_tl(cpu_cc_src, t0); - tcg_gen_mov_tl(cpu_cc_srcT, t2); - tcg_gen_sub_tl(cpu_cc_dst, t2, t0); + tcg_gen_mov_tl(cpu_cc_src, oldv); + tcg_gen_mov_tl(cpu_cc_srcT, cmpv); + tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv); set_cc_op(s, CC_OP_SUBB + ot); - tcg_temp_free(t0); - tcg_temp_free(t1); - tcg_temp_free(t2); - tcg_temp_free(a0); + tcg_temp_free(oldv); + tcg_temp_free(newv); + tcg_temp_free(cmpv); } break; case 0x1c7: /* cmpxchg8b */ @@ -5132,14 +5220,22 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) goto illegal_op; gen_lea_modrm(env, s, modrm); - gen_helper_cmpxchg16b(cpu_env, cpu_A0); + if ((s->prefix & PREFIX_LOCK) && parallel_cpus) { + gen_helper_cmpxchg16b(cpu_env, cpu_A0); + } else { + gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0); + } } else #endif { if (!(s->cpuid_features & CPUID_CX8)) goto illegal_op; gen_lea_modrm(env, s, modrm); - gen_helper_cmpxchg8b(cpu_env, cpu_A0); + if ((s->prefix & PREFIX_LOCK) && parallel_cpus) { + gen_helper_cmpxchg8b(cpu_env, cpu_A0); + } else { + gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0); + } } set_cc_op(s, CC_OP_EFLAGS); break; @@ -5464,12 +5560,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_lea_modrm(env, s, modrm); gen_op_mov_v_reg(ot, cpu_T0, reg); /* for xchg, lock is implicit */ - if (!(prefixes & PREFIX_LOCK)) - gen_helper_lock(); - gen_op_ld_v(s, ot, cpu_T1, cpu_A0); - gen_op_st_v(s, ot, cpu_T0, cpu_A0); - if (!(prefixes & PREFIX_LOCK)) - gen_helper_unlock(); + tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0, + s->mem_index, ot | MO_LE); gen_op_mov_reg_v(ot, reg, cpu_T1); } break; @@ -6555,7 +6647,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (mod != 3) { s->rip_offset = 1; gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + if (!(s->prefix & PREFIX_LOCK)) { + gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + } } else { gen_op_mov_v_reg(ot, cpu_T0, rm); } @@ -6585,44 +6679,69 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, rm = (modrm & 7) | REX_B(s); gen_op_mov_v_reg(MO_32, cpu_T1, reg); if (mod != 3) { - gen_lea_modrm(env, s, modrm); + AddressParts a = gen_lea_modrm_0(env, s, modrm); /* specific case: we need to add a displacement */ gen_exts(ot, cpu_T1); tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot); tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot); - tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0); + gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override); + if (!(s->prefix & PREFIX_LOCK)) { + gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + } } else { gen_op_mov_v_reg(ot, cpu_T0, rm); } bt_op: tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1); - tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1); - switch(op) { - case 0: - break; - case 1: - tcg_gen_movi_tl(cpu_tmp0, 1); - tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1); - tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0); - break; - case 2: - tcg_gen_movi_tl(cpu_tmp0, 1); - tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1); - tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0); - break; - default: - case 3: - tcg_gen_movi_tl(cpu_tmp0, 1); - tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1); - tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0); - break; - } - if (op != 0) { - if (mod != 3) { - gen_op_st_v(s, ot, cpu_T0, cpu_A0); - } else { - gen_op_mov_reg_v(ot, rm, cpu_T0); + tcg_gen_movi_tl(cpu_tmp0, 1); + tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1); + if (s->prefix & PREFIX_LOCK) { + switch (op) { + case 0: /* bt */ + /* Needs no atomic ops; we surpressed the normal + memory load for LOCK above so do it now. */ + gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + break; + case 1: /* bts */ + tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0, + s->mem_index, ot | MO_LE); + break; + case 2: /* btr */ + tcg_gen_not_tl(cpu_tmp0, cpu_tmp0); + tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0, + s->mem_index, ot | MO_LE); + break; + default: + case 3: /* btc */ + tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0, + s->mem_index, ot | MO_LE); + break; + } + tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1); + } else { + tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1); + switch (op) { + case 0: /* bt */ + /* Data already loaded; nothing to do. */ + break; + case 1: /* bts */ + tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0); + break; + case 2: /* btr */ + tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0); + break; + default: + case 3: /* btc */ + tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0); + break; + } + if (op != 0) { + if (mod != 3) { + gen_op_st_v(s, ot, cpu_T0, cpu_A0); + } else { + gen_op_mov_reg_v(ot, rm, cpu_T0); + } } } @@ -8088,20 +8207,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, default: goto unknown_op; } - /* lock generation */ - if (s->prefix & PREFIX_LOCK) - gen_helper_unlock(); return s->pc; illegal_op: - if (s->prefix & PREFIX_LOCK) - gen_helper_unlock(); - /* XXX: ensure that no lock was generated */ gen_illegal_opcode(s); return s->pc; unknown_op: - if (s->prefix & PREFIX_LOCK) - gen_helper_unlock(); - /* XXX: ensure that no lock was generated */ gen_unknown_opcode(env, s); return s->pc; } @@ -8193,8 +8303,6 @@ void tcg_x86_init(void) offsetof(CPUX86State, bnd_regs[i].ub), bnd_regu_names[i]); } - - helper_lock_init(); } /* generate intermediate code for basic block 'tb'. */ diff --git a/target-m68k/cpu.c b/target-m68k/cpu.c index 17e4be2934..ba17480098 100644 --- a/target-m68k/cpu.c +++ b/target-m68k/cpu.c @@ -58,15 +58,20 @@ static void m68k_cpu_reset(CPUState *s) #endif m68k_switch_sp(env); /* ??? FP regs should be initialized to NaN. */ - env->cc_op = CC_OP_FLAGS; + cpu_m68k_set_ccr(env, 0); /* TODO: We should set PC from the interrupt vector. */ env->pc = 0; tlb_flush(s, 1); } -static void m68k_cpu_disas_set_info(CPUState *cpu, disassemble_info *info) +static void m68k_cpu_disas_set_info(CPUState *s, disassemble_info *info) { + M68kCPU *cpu = M68K_CPU(s); + CPUM68KState *env = &cpu->env; info->print_insn = print_insn_m68k; + if (m68k_feature(env, M68K_FEATURE_M68000)) { + info->mach = bfd_mach_m68040; + } } /* CPU models */ @@ -98,6 +103,57 @@ static void m5206_cpu_initfn(Object *obj) m68k_set_feature(env, M68K_FEATURE_CF_ISA_A); } +static void m68000_cpu_initfn(Object *obj) +{ + M68kCPU *cpu = M68K_CPU(obj); + CPUM68KState *env = &cpu->env; + + m68k_set_feature(env, M68K_FEATURE_M68000); + m68k_set_feature(env, M68K_FEATURE_USP); + m68k_set_feature(env, M68K_FEATURE_WORD_INDEX); +} + +static void m68020_cpu_initfn(Object *obj) +{ + M68kCPU *cpu = M68K_CPU(obj); + CPUM68KState *env = &cpu->env; + + m68k_set_feature(env, M68K_FEATURE_M68000); + m68k_set_feature(env, M68K_FEATURE_USP); + m68k_set_feature(env, M68K_FEATURE_WORD_INDEX); + m68k_set_feature(env, M68K_FEATURE_QUAD_MULDIV); + m68k_set_feature(env, M68K_FEATURE_BRAL); + m68k_set_feature(env, M68K_FEATURE_BCCL); + m68k_set_feature(env, M68K_FEATURE_BITFIELD); + m68k_set_feature(env, M68K_FEATURE_EXT_FULL); + m68k_set_feature(env, M68K_FEATURE_SCALED_INDEX); + m68k_set_feature(env, M68K_FEATURE_LONG_MULDIV); + m68k_set_feature(env, M68K_FEATURE_FPU); + m68k_set_feature(env, M68K_FEATURE_CAS); + m68k_set_feature(env, M68K_FEATURE_BKPT); +} +#define m68030_cpu_initfn m68020_cpu_initfn +#define m68040_cpu_initfn m68020_cpu_initfn + +static void m68060_cpu_initfn(Object *obj) +{ + M68kCPU *cpu = M68K_CPU(obj); + CPUM68KState *env = &cpu->env; + + m68k_set_feature(env, M68K_FEATURE_M68000); + m68k_set_feature(env, M68K_FEATURE_USP); + m68k_set_feature(env, M68K_FEATURE_WORD_INDEX); + m68k_set_feature(env, M68K_FEATURE_BRAL); + m68k_set_feature(env, M68K_FEATURE_BCCL); + m68k_set_feature(env, M68K_FEATURE_BITFIELD); + m68k_set_feature(env, M68K_FEATURE_EXT_FULL); + m68k_set_feature(env, M68K_FEATURE_SCALED_INDEX); + m68k_set_feature(env, M68K_FEATURE_LONG_MULDIV); + m68k_set_feature(env, M68K_FEATURE_FPU); + m68k_set_feature(env, M68K_FEATURE_CAS); + m68k_set_feature(env, M68K_FEATURE_BKPT); +} + static void m5208_cpu_initfn(Object *obj) { M68kCPU *cpu = M68K_CPU(obj); @@ -148,6 +204,11 @@ typedef struct M68kCPUInfo { } M68kCPUInfo; static const M68kCPUInfo m68k_cpus[] = { + { .name = "m68000", .instance_init = m68000_cpu_initfn }, + { .name = "m68020", .instance_init = m68020_cpu_initfn }, + { .name = "m68030", .instance_init = m68030_cpu_initfn }, + { .name = "m68040", .instance_init = m68040_cpu_initfn }, + { .name = "m68060", .instance_init = m68060_cpu_initfn }, { .name = "m5206", .instance_init = m5206_cpu_initfn }, { .name = "m5208", .instance_init = m5208_cpu_initfn }, { .name = "cfv4e", .instance_init = cfv4e_cpu_initfn }, @@ -220,8 +281,6 @@ static void m68k_cpu_class_init(ObjectClass *c, void *data) #else cc->get_phys_page_debug = m68k_cpu_get_phys_page_debug; #endif - cc->cpu_exec_enter = m68k_cpu_exec_enter; - cc->cpu_exec_exit = m68k_cpu_exec_exit; cc->disas_set_info = m68k_cpu_disas_set_info; cc->gdb_num_core_regs = 18; diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h index 471f490dc1..48c5b811f3 100644 --- a/target-m68k/cpu.h +++ b/target-m68k/cpu.h @@ -30,6 +30,14 @@ #include "cpu-qom.h" #include "fpu/softfloat.h" +#define OS_BYTE 0 +#define OS_WORD 1 +#define OS_LONG 2 +#define OS_SINGLE 3 +#define OS_DOUBLE 4 +#define OS_EXTENDED 5 +#define OS_PACKED 6 + #define MAX_QREGS 32 #define EXCP_ACCESS 2 /* Access (MMU) error. */ @@ -53,6 +61,7 @@ #define EXCP_HALT_INSN 0x101 #define NB_MMU_MODES 2 +#define TARGET_INSN_START_EXTRA_WORDS 1 typedef struct CPUM68KState { uint32_t dregs[8]; @@ -66,9 +75,11 @@ typedef struct CPUM68KState { /* Condition flags. */ uint32_t cc_op; - uint32_t cc_dest; - uint32_t cc_src; - uint32_t cc_x; + uint32_t cc_x; /* always 0/1 */ + uint32_t cc_n; /* in bit 31 (i.e. negative) */ + uint32_t cc_v; /* in bit 31, unused, or computed from cc_n and cc_v */ + uint32_t cc_c; /* either 0/1, unused, or computed from cc_n and cc_v */ + uint32_t cc_z; /* == 0 or unused */ float64 fregs[8]; float64 fp_result; @@ -141,9 +152,6 @@ hwaddr m68k_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); int m68k_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg); int m68k_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); -void m68k_cpu_exec_enter(CPUState *cs); -void m68k_cpu_exec_exit(CPUState *cs); - void m68k_tcg_init(void); void m68k_cpu_init_gdb(M68kCPU *cpu); M68kCPU *cpu_m68k_init(const char *cpu_model); @@ -152,7 +160,8 @@ M68kCPU *cpu_m68k_init(const char *cpu_model); is returned if the signal was handled by the virtual CPU. */ int cpu_m68k_signal_handler(int host_signum, void *pinfo, void *puc); -void cpu_m68k_flush_flags(CPUM68KState *, int); +uint32_t cpu_m68k_get_ccr(CPUM68KState *env); +void cpu_m68k_set_ccr(CPUM68KState *env, uint32_t); /* Instead of computing the condition codes after each m68k instruction, @@ -162,18 +171,25 @@ void cpu_m68k_flush_flags(CPUM68KState *, int); * using this information. Condition codes are not generated if they * are only needed for conditional branches. */ -enum { - CC_OP_DYNAMIC, /* Use env->cc_op */ - CC_OP_FLAGS, /* CC_DEST = CVZN, CC_SRC = unused */ - CC_OP_LOGIC, /* CC_DEST = result, CC_SRC = unused */ - CC_OP_ADD, /* CC_DEST = result, CC_SRC = source */ - CC_OP_SUB, /* CC_DEST = result, CC_SRC = source */ - CC_OP_CMPB, /* CC_DEST = result, CC_SRC = source */ - CC_OP_CMPW, /* CC_DEST = result, CC_SRC = source */ - CC_OP_ADDX, /* CC_DEST = result, CC_SRC = source */ - CC_OP_SUBX, /* CC_DEST = result, CC_SRC = source */ - CC_OP_SHIFT, /* CC_DEST = result, CC_SRC = carry */ -}; +typedef enum { + /* Translator only -- use env->cc_op. */ + CC_OP_DYNAMIC = -1, + + /* Each flag bit computed into cc_[xcnvz]. */ + CC_OP_FLAGS, + + /* X in cc_x, C = X, N in cc_n, Z in cc_n, V via cc_n/cc_v. */ + CC_OP_ADD, + CC_OP_SUB, + + /* X in cc_x, {N,Z,C,V} via cc_n/cc_v. */ + CC_OP_CMP, + + /* X in cc_x, C = 0, V = 0, N in cc_n, Z in cc_n. */ + CC_OP_LOGIC, + + CC_OP_NB +} CCOp; #define CCF_C 0x01 #define CCF_V 0x02 @@ -215,6 +231,7 @@ void do_m68k_semihosting(CPUM68KState *env, int nr); ISA revisions mentioned. */ enum m68k_features { + M68K_FEATURE_M68000, M68K_FEATURE_CF_ISA_A, M68K_FEATURE_CF_ISA_B, /* (ISA B or C). */ M68K_FEATURE_CF_ISA_APLUSC, /* BIT/BITREV, FF1, STRLDSR (ISA A+ or C). */ @@ -225,7 +242,15 @@ enum m68k_features { M68K_FEATURE_CF_EMAC_B, /* Revision B EMAC (dual accumulate). */ M68K_FEATURE_USP, /* User Stack Pointer. (ISA A+, B or C). */ M68K_FEATURE_EXT_FULL, /* 68020+ full extension word. */ - M68K_FEATURE_WORD_INDEX /* word sized address index registers. */ + M68K_FEATURE_WORD_INDEX, /* word sized address index registers. */ + M68K_FEATURE_SCALED_INDEX, /* scaled address index registers. */ + M68K_FEATURE_LONG_MULDIV, /* 32 bit multiply/divide. */ + M68K_FEATURE_QUAD_MULDIV, /* 64 bit multiply/divide. */ + M68K_FEATURE_BCCL, /* Long conditional branches. */ + M68K_FEATURE_BITFIELD, /* Bit field insns. */ + M68K_FEATURE_FPU, + M68K_FEATURE_CAS, + M68K_FEATURE_BKPT, }; static inline int m68k_feature(CPUM68KState *env, int feature) @@ -238,8 +263,11 @@ void m68k_cpu_list(FILE *f, fprintf_function cpu_fprintf); void register_m68k_insns (CPUM68KState *env); #ifdef CONFIG_USER_ONLY -/* Linux uses 8k pages. */ -#define TARGET_PAGE_BITS 13 +/* Coldfire Linux uses 8k pages + * and m68k linux uses 4k pages + * use the smaller one + */ +#define TARGET_PAGE_BITS 12 #else /* Smallest TLB entry size is 1k. */ #define TARGET_PAGE_BITS 10 diff --git a/target-m68k/helper.c b/target-m68k/helper.c index 89bbe6dfa6..094a7e59a9 100644 --- a/target-m68k/helper.c +++ b/target-m68k/helper.c @@ -132,87 +132,6 @@ void m68k_cpu_init_gdb(M68kCPU *cpu) /* TODO: Add [E]MAC registers. */ } -void cpu_m68k_flush_flags(CPUM68KState *env, int cc_op) -{ - M68kCPU *cpu = m68k_env_get_cpu(env); - int flags; - uint32_t src; - uint32_t dest; - uint32_t tmp; - -#define HIGHBIT 0x80000000u - -#define SET_NZ(x) do { \ - if ((x) == 0) \ - flags |= CCF_Z; \ - else if ((int32_t)(x) < 0) \ - flags |= CCF_N; \ - } while (0) - -#define SET_FLAGS_SUB(type, utype) do { \ - SET_NZ((type)dest); \ - tmp = dest + src; \ - if ((utype) tmp < (utype) src) \ - flags |= CCF_C; \ - if ((1u << (sizeof(type) * 8 - 1)) & (tmp ^ dest) & (tmp ^ src)) \ - flags |= CCF_V; \ - } while (0) - - flags = 0; - src = env->cc_src; - dest = env->cc_dest; - switch (cc_op) { - case CC_OP_FLAGS: - flags = dest; - break; - case CC_OP_LOGIC: - SET_NZ(dest); - break; - case CC_OP_ADD: - SET_NZ(dest); - if (dest < src) - flags |= CCF_C; - tmp = dest - src; - if (HIGHBIT & (src ^ dest) & ~(tmp ^ src)) - flags |= CCF_V; - break; - case CC_OP_SUB: - SET_FLAGS_SUB(int32_t, uint32_t); - break; - case CC_OP_CMPB: - SET_FLAGS_SUB(int8_t, uint8_t); - break; - case CC_OP_CMPW: - SET_FLAGS_SUB(int16_t, uint16_t); - break; - case CC_OP_ADDX: - SET_NZ(dest); - if (dest <= src) - flags |= CCF_C; - tmp = dest - src - 1; - if (HIGHBIT & (src ^ dest) & ~(tmp ^ src)) - flags |= CCF_V; - break; - case CC_OP_SUBX: - SET_NZ(dest); - tmp = dest + src + 1; - if (tmp <= src) - flags |= CCF_C; - if (HIGHBIT & (tmp ^ dest) & (tmp ^ src)) - flags |= CCF_V; - break; - case CC_OP_SHIFT: - SET_NZ(dest); - if (src) - flags |= CCF_C; - break; - default: - cpu_abort(CPU(cpu), "Bad CC_OP %d", cc_op); - } - env->cc_op = CC_OP_FLAGS; - env->cc_dest = flags; -} - void HELPER(movec)(CPUM68KState *env, uint32_t reg, uint32_t val) { M68kCPU *cpu = m68k_env_get_cpu(env); @@ -349,140 +268,111 @@ uint32_t HELPER(ff1)(uint32_t x) return n; } -uint32_t HELPER(sats)(uint32_t val, uint32_t ccr) +uint32_t HELPER(sats)(uint32_t val, uint32_t v) { /* The result has the opposite sign to the original value. */ - if (ccr & CCF_V) + if ((int32_t)v < 0) { val = (((int32_t)val) >> 31) ^ SIGNBIT; + } return val; } uint32_t HELPER(subx_cc)(CPUM68KState *env, uint32_t op1, uint32_t op2) { - uint32_t res; - uint32_t old_flags; + uint32_t res, new_x; - old_flags = env->cc_dest; if (env->cc_x) { - env->cc_x = (op1 <= op2); - env->cc_op = CC_OP_SUBX; + new_x = (op1 <= op2); res = op1 - (op2 + 1); } else { - env->cc_x = (op1 < op2); - env->cc_op = CC_OP_SUB; + new_x = (op1 < op2); res = op1 - op2; } - env->cc_dest = res; - env->cc_src = op2; - cpu_m68k_flush_flags(env, env->cc_op); - /* !Z is sticky. */ - env->cc_dest &= (old_flags | ~CCF_Z); + env->cc_x = new_x; + env->cc_c = new_x; + env->cc_n = res; + env->cc_z |= res; /* !Z is sticky */ + env->cc_v = (res ^ op1) & (op1 ^ op2); + return res; } uint32_t HELPER(addx_cc)(CPUM68KState *env, uint32_t op1, uint32_t op2) { - uint32_t res; - uint32_t old_flags; + uint32_t res, new_x; - old_flags = env->cc_dest; if (env->cc_x) { res = op1 + op2 + 1; - env->cc_x = (res <= op2); - env->cc_op = CC_OP_ADDX; + new_x = (res <= op2); } else { res = op1 + op2; - env->cc_x = (res < op2); - env->cc_op = CC_OP_ADD; + new_x = (res < op2); } - env->cc_dest = res; - env->cc_src = op2; - cpu_m68k_flush_flags(env, env->cc_op); - /* !Z is sticky. */ - env->cc_dest &= (old_flags | ~CCF_Z); - return res; -} + env->cc_x = new_x; + env->cc_c = new_x; + env->cc_n = res; + env->cc_z |= res; /* !Z is sticky. */ + env->cc_v = (res ^ op1) & ~(op1 ^ op2); -uint32_t HELPER(xflag_lt)(uint32_t a, uint32_t b) -{ - return a < b; + return res; } void HELPER(set_sr)(CPUM68KState *env, uint32_t val) { - env->sr = val & 0xffff; + env->sr = val & 0xffe0; + cpu_m68k_set_ccr(env, val); m68k_switch_sp(env); } uint32_t HELPER(shl_cc)(CPUM68KState *env, uint32_t val, uint32_t shift) { - uint32_t result; - uint32_t cf; + uint64_t result; shift &= 63; - if (shift == 0) { - result = val; - cf = env->cc_src & CCF_C; - } else if (shift < 32) { - result = val << shift; - cf = (val >> (32 - shift)) & 1; - } else if (shift == 32) { - result = 0; - cf = val & 1; - } else /* shift > 32 */ { - result = 0; - cf = 0; - } - env->cc_src = cf; - env->cc_x = (cf != 0); - env->cc_dest = result; + result = (uint64_t)val << shift; + + env->cc_c = (result >> 32) & 1; + env->cc_n = result; + env->cc_z = result; + env->cc_v = 0; + env->cc_x = shift ? env->cc_c : env->cc_x; + return result; } uint32_t HELPER(shr_cc)(CPUM68KState *env, uint32_t val, uint32_t shift) { + uint64_t temp; uint32_t result; - uint32_t cf; shift &= 63; - if (shift == 0) { - result = val; - cf = env->cc_src & CCF_C; - } else if (shift < 32) { - result = val >> shift; - cf = (val >> (shift - 1)) & 1; - } else if (shift == 32) { - result = 0; - cf = val >> 31; - } else /* shift > 32 */ { - result = 0; - cf = 0; - } - env->cc_src = cf; - env->cc_x = (cf != 0); - env->cc_dest = result; + temp = (uint64_t)val << 32 >> shift; + result = temp >> 32; + + env->cc_c = (temp >> 31) & 1; + env->cc_n = result; + env->cc_z = result; + env->cc_v = 0; + env->cc_x = shift ? env->cc_c : env->cc_x; + return result; } uint32_t HELPER(sar_cc)(CPUM68KState *env, uint32_t val, uint32_t shift) { + uint64_t temp; uint32_t result; - uint32_t cf; shift &= 63; - if (shift == 0) { - result = val; - cf = (env->cc_src & CCF_C) != 0; - } else if (shift < 32) { - result = (int32_t)val >> shift; - cf = (val >> (shift - 1)) & 1; - } else /* shift >= 32 */ { - result = (int32_t)val >> 31; - cf = val >> 31; - } - env->cc_src = cf; - env->cc_x = cf; - env->cc_dest = result; + temp = (int64_t)val << 32 >> shift; + result = temp >> 32; + + env->cc_c = (temp >> 31) & 1; + env->cc_n = result; + env->cc_z = result; + env->cc_v = result ^ val; + env->cc_x = shift ? env->cc_c : env->cc_x; + return result; } @@ -734,9 +624,92 @@ void HELPER(mac_set_flags)(CPUM68KState *env, uint32_t acc) } } + +#define COMPUTE_CCR(op, x, n, z, v, c) { \ + switch (op) { \ + case CC_OP_FLAGS: \ + /* Everything in place. */ \ + break; \ + case CC_OP_ADD: \ + res = n; \ + src2 = v; \ + src1 = res - src2; \ + c = x; \ + z = n; \ + v = (res ^ src1) & ~(src1 ^ src2); \ + break; \ + case CC_OP_SUB: \ + res = n; \ + src2 = v; \ + src1 = res + src2; \ + c = x; \ + z = n; \ + v = (res ^ src1) & (src1 ^ src2); \ + break; \ + case CC_OP_CMP: \ + src1 = n; \ + src2 = v; \ + res = src1 - src2; \ + n = res; \ + z = res; \ + c = src1 < src2; \ + v = (res ^ src1) & (src1 ^ src2); \ + break; \ + case CC_OP_LOGIC: \ + c = v = 0; \ + z = n; \ + break; \ + default: \ + cpu_abort(CPU(m68k_env_get_cpu(env)), "Bad CC_OP %d", op); \ + } \ +} while (0) + +uint32_t cpu_m68k_get_ccr(CPUM68KState *env) +{ + uint32_t x, c, n, z, v; + uint32_t res, src1, src2; + + x = env->cc_x; + c = env->cc_c; + n = env->cc_n; + z = env->cc_z; + v = env->cc_v; + + COMPUTE_CCR(env->cc_op, x, n, z, v, c); + + n = n >> 31; + v = v >> 31; + z = (z == 0); + + return x * CCF_X + n * CCF_N + z * CCF_Z + v * CCF_V + c * CCF_C; +} + +uint32_t HELPER(get_ccr)(CPUM68KState *env) +{ + return cpu_m68k_get_ccr(env); +} + +void cpu_m68k_set_ccr(CPUM68KState *env, uint32_t ccr) +{ + env->cc_x = (ccr & CCF_X ? 1 : 0); + env->cc_n = (ccr & CCF_N ? -1 : 0); + env->cc_z = (ccr & CCF_Z ? 0 : 1); + env->cc_v = (ccr & CCF_V ? -1 : 0); + env->cc_c = (ccr & CCF_C ? 1 : 0); + env->cc_op = CC_OP_FLAGS; +} + +void HELPER(set_ccr)(CPUM68KState *env, uint32_t ccr) +{ + cpu_m68k_set_ccr(env, ccr); +} + void HELPER(flush_flags)(CPUM68KState *env, uint32_t cc_op) { - cpu_m68k_flush_flags(env, cc_op); + uint32_t res, src1, src2; + + COMPUTE_CCR(cc_op, env->cc_x, env->cc_n, env->cc_z, env->cc_v, env->cc_c); + env->cc_op = CC_OP_FLAGS; } uint32_t HELPER(get_macf)(CPUM68KState *env, uint64_t val) @@ -866,23 +839,3 @@ void HELPER(set_mac_extu)(CPUM68KState *env, uint32_t val, uint32_t acc) res |= (uint64_t)(val & 0xffff0000) << 16; env->macc[acc + 1] = res; } - -void m68k_cpu_exec_enter(CPUState *cs) -{ - M68kCPU *cpu = M68K_CPU(cs); - CPUM68KState *env = &cpu->env; - - env->cc_op = CC_OP_FLAGS; - env->cc_dest = env->sr & 0xf; - env->cc_x = (env->sr >> 4) & 1; -} - -void m68k_cpu_exec_exit(CPUState *cs) -{ - M68kCPU *cpu = M68K_CPU(cs); - CPUM68KState *env = &cpu->env; - - cpu_m68k_flush_flags(env, env->cc_op); - env->cc_op = CC_OP_FLAGS; - env->sr = (env->sr & 0xffe0) | env->cc_dest | (env->cc_x << 4); -} diff --git a/target-m68k/helper.h b/target-m68k/helper.h index f4e5fdf021..c8681480f0 100644 --- a/target-m68k/helper.h +++ b/target-m68k/helper.h @@ -1,6 +1,6 @@ DEF_HELPER_1(bitrev, i32, i32) DEF_HELPER_1(ff1, i32, i32) -DEF_HELPER_2(sats, i32, i32, i32) +DEF_HELPER_FLAGS_2(sats, TCG_CALL_NO_RWG_SE, i32, i32, i32) DEF_HELPER_2(divu, void, env, i32) DEF_HELPER_2(divs, void, env, i32) DEF_HELPER_3(addx_cc, i32, env, i32, i32) @@ -8,7 +8,6 @@ DEF_HELPER_3(subx_cc, i32, env, i32, i32) DEF_HELPER_3(shl_cc, i32, env, i32, i32) DEF_HELPER_3(shr_cc, i32, env, i32, i32) DEF_HELPER_3(sar_cc, i32, env, i32, i32) -DEF_HELPER_2(xflag_lt, i32, i32, i32) DEF_HELPER_2(set_sr, void, env, i32) DEF_HELPER_3(movec, void, env, i32, i32) @@ -47,4 +46,6 @@ DEF_HELPER_3(set_mac_exts, void, env, i32, i32) DEF_HELPER_3(set_mac_extu, void, env, i32, i32) DEF_HELPER_2(flush_flags, void, env, i32) +DEF_HELPER_2(set_ccr, void, env, i32) +DEF_HELPER_FLAGS_1(get_ccr, TCG_CALL_NO_WG_SE, i32, env) DEF_HELPER_2(raise_exception, void, env, i32) diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c index e41ae46498..48e02e4062 100644 --- a/target-m68k/op_helper.c +++ b/target-m68k/op_helper.c @@ -63,9 +63,9 @@ static void do_rte(CPUM68KState *env) fmt = cpu_ldl_kernel(env, sp); env->pc = cpu_ldl_kernel(env, sp + 4); sp |= (fmt >> 28) & 3; - env->sr = fmt & 0xffff; env->aregs[7] = sp + 8; - m68k_switch_sp(env); + + helper_set_sr(env, fmt); } static void do_interrupt_all(CPUM68KState *env, int is_hw) @@ -112,6 +112,7 @@ static void do_interrupt_all(CPUM68KState *env, int is_hw) fmt |= 0x40000000; fmt |= vector << 16; fmt |= env->sr; + fmt |= cpu_m68k_get_ccr(env); env->sr |= SR_S; if (is_hw) { @@ -184,7 +185,6 @@ void HELPER(divu)(CPUM68KState *env, uint32_t word) uint32_t den; uint32_t quot; uint32_t rem; - uint32_t flags; num = env->div1; den = env->div2; @@ -194,16 +194,14 @@ void HELPER(divu)(CPUM68KState *env, uint32_t word) } quot = num / den; rem = num % den; - flags = 0; - if (word && quot > 0xffff) - flags |= CCF_V; - if (quot == 0) - flags |= CCF_Z; - else if ((int32_t)quot < 0) - flags |= CCF_N; + + env->cc_v = (word && quot > 0xffff ? -1 : 0); + env->cc_z = quot; + env->cc_n = quot; + env->cc_c = 0; + env->div1 = quot; env->div2 = rem; - env->cc_dest = flags; } void HELPER(divs)(CPUM68KState *env, uint32_t word) @@ -212,7 +210,6 @@ void HELPER(divs)(CPUM68KState *env, uint32_t word) int32_t den; int32_t quot; int32_t rem; - int32_t flags; num = env->div1; den = env->div2; @@ -221,14 +218,12 @@ void HELPER(divs)(CPUM68KState *env, uint32_t word) } quot = num / den; rem = num % den; - flags = 0; - if (word && quot != (int16_t)quot) - flags |= CCF_V; - if (quot == 0) - flags |= CCF_Z; - else if (quot < 0) - flags |= CCF_N; + + env->cc_v = (word && quot != (int16_t)quot ? -1 : 0); + env->cc_z = quot; + env->cc_n = quot; + env->cc_c = 0; + env->div1 = quot; env->div2 = rem; - env->cc_dest = flags; } diff --git a/target-m68k/qregs.def b/target-m68k/qregs.def index 204663e1aa..156c0f558f 100644 --- a/target-m68k/qregs.def +++ b/target-m68k/qregs.def @@ -2,9 +2,11 @@ DEFF64(FP_RESULT, fp_result) DEFO32(PC, pc) DEFO32(SR, sr) DEFO32(CC_OP, cc_op) -DEFO32(CC_DEST, cc_dest) -DEFO32(CC_SRC, cc_src) DEFO32(CC_X, cc_x) +DEFO32(CC_C, cc_c) +DEFO32(CC_N, cc_n) +DEFO32(CC_V, cc_v) +DEFO32(CC_Z, cc_z) DEFO32(DIV1, div1) DEFO32(DIV2, div2) DEFO32(MACSR, macsr) diff --git a/target-m68k/translate.c b/target-m68k/translate.c index ecd5e5c8fd..6c6173a09e 100644 --- a/target-m68k/translate.c +++ b/target-m68k/translate.c @@ -59,9 +59,10 @@ static TCGv cpu_aregs[8]; static TCGv_i64 cpu_fregs[8]; static TCGv_i64 cpu_macc[4]; -#define DREG(insn, pos) cpu_dregs[((insn) >> (pos)) & 7] -#define AREG(insn, pos) cpu_aregs[((insn) >> (pos)) & 7] -#define FREG(insn, pos) cpu_fregs[((insn) >> (pos)) & 7] +#define REG(insn, pos) (((insn) >> (pos)) & 7) +#define DREG(insn, pos) cpu_dregs[REG(insn, pos)] +#define AREG(insn, pos) cpu_aregs[REG(insn, pos)] +#define FREG(insn, pos) cpu_fregs[REG(insn, pos)] #define MACREG(acc) cpu_macc[acc] #define QREG_SP cpu_aregs[7] @@ -132,7 +133,8 @@ typedef struct DisasContext { target_ulong insn_pc; /* Start of the current instruction. */ target_ulong pc; int is_jmp; - int cc_op; + CCOp cc_op; /* Current CC operation */ + int cc_op_synced; int user; uint32_t fpcr; struct TranslationBlock *tb; @@ -154,12 +156,6 @@ typedef struct DisasContext { static void *gen_throws_exception; #define gen_last_qop NULL -#define OS_BYTE 0 -#define OS_WORD 1 -#define OS_LONG 2 -#define OS_SINGLE 4 -#define OS_DOUBLE 5 - typedef void (*disas_proc)(CPUM68KState *env, DisasContext *s, uint16_t insn); #ifdef DEBUG_DISPATCH @@ -170,7 +166,7 @@ typedef void (*disas_proc)(CPUM68KState *env, DisasContext *s, uint16_t insn); uint16_t insn) \ { \ qemu_log("Dispatch " #name "\n"); \ - real_disas_##name(s, env, insn); \ + real_disas_##name(env, s, insn); \ } \ static void real_disas_##name(CPUM68KState *env, DisasContext *s, \ uint16_t insn) @@ -180,6 +176,48 @@ typedef void (*disas_proc)(CPUM68KState *env, DisasContext *s, uint16_t insn); uint16_t insn) #endif +static const uint8_t cc_op_live[CC_OP_NB] = { + [CC_OP_FLAGS] = CCF_C | CCF_V | CCF_Z | CCF_N | CCF_X, + [CC_OP_ADD] = CCF_X | CCF_N | CCF_V, + [CC_OP_SUB] = CCF_X | CCF_N | CCF_V, + [CC_OP_CMP] = CCF_X | CCF_N | CCF_V, + [CC_OP_LOGIC] = CCF_X | CCF_N +}; + +static void set_cc_op(DisasContext *s, CCOp op) +{ + CCOp old_op = s->cc_op; + int dead; + + if (old_op == op) { + return; + } + s->cc_op = op; + s->cc_op_synced = 0; + + /* Discard CC computation that will no longer be used. + Note that X and N are never dead. */ + dead = cc_op_live[old_op] & ~cc_op_live[op]; + if (dead & CCF_C) { + tcg_gen_discard_i32(QREG_CC_C); + } + if (dead & CCF_Z) { + tcg_gen_discard_i32(QREG_CC_Z); + } + if (dead & CCF_V) { + tcg_gen_discard_i32(QREG_CC_V); + } +} + +/* Update the CPU env CC_OP state. */ +static void update_cc_op(DisasContext *s) +{ + if (!s->cc_op_synced) { + s->cc_op_synced = 1; + tcg_gen_movi_i32(QREG_CC_OP, s->cc_op); + } +} + /* Generate a load from the specified address. Narrow values are sign extended to full register width. */ static inline TCGv gen_load(DisasContext * s, int opsize, TCGv addr, int sign) @@ -268,14 +306,27 @@ static TCGv gen_ldst(DisasContext *s, int opsize, TCGv addr, TCGv val, } } +/* Read a 16-bit immediate constant */ +static inline uint16_t read_im16(CPUM68KState *env, DisasContext *s) +{ + uint16_t im; + im = cpu_lduw_code(env, s->pc); + s->pc += 2; + return im; +} + +/* Read an 8-bit immediate constant */ +static inline uint8_t read_im8(CPUM68KState *env, DisasContext *s) +{ + return read_im16(env, s); +} + /* Read a 32-bit immediate constant. */ static inline uint32_t read_im32(CPUM68KState *env, DisasContext *s) { uint32_t im; - im = ((uint32_t)cpu_lduw_code(env, s->pc)) << 16; - s->pc += 2; - im |= cpu_lduw_code(env, s->pc); - s->pc += 2; + im = read_im16(env, s) << 16; + im |= 0xffff & read_im16(env, s); return im; } @@ -309,12 +360,16 @@ static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, TCGv base) uint32_t bd, od; offset = s->pc; - ext = cpu_lduw_code(env, s->pc); - s->pc += 2; + ext = read_im16(env, s); if ((ext & 0x800) == 0 && !m68k_feature(s->env, M68K_FEATURE_WORD_INDEX)) return NULL_QREG; + if (m68k_feature(s->env, M68K_FEATURE_M68000) && + !m68k_feature(s->env, M68K_FEATURE_SCALED_INDEX)) { + ext &= ~(3 << 9); + } + if (ext & 0x100) { /* full extension word format */ if (!m68k_feature(s->env, M68K_FEATURE_EXT_FULL)) @@ -323,8 +378,7 @@ static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, TCGv base) if ((ext & 0x30) > 0x10) { /* base displacement */ if ((ext & 0x30) == 0x20) { - bd = (int16_t)cpu_lduw_code(env, s->pc); - s->pc += 2; + bd = (int16_t)read_im16(env, s); } else { bd = read_im32(env, s); } @@ -372,8 +426,7 @@ static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, TCGv base) if ((ext & 3) > 1) { /* outer displacement */ if ((ext & 3) == 2) { - od = (int16_t)cpu_lduw_code(env, s->pc); - s->pc += 2; + od = (int16_t)read_im16(env, s); } else { od = read_im32(env, s); } @@ -401,33 +454,129 @@ static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, TCGv base) return add; } -/* Update the CPU env CC_OP state. */ -static inline void gen_flush_cc_op(DisasContext *s) -{ - if (s->cc_op != CC_OP_DYNAMIC) - tcg_gen_movi_i32(QREG_CC_OP, s->cc_op); -} - /* Evaluate all the CC flags. */ -static inline void gen_flush_flags(DisasContext *s) + +static void gen_flush_flags(DisasContext *s) { - if (s->cc_op == CC_OP_FLAGS) + TCGv t0, t1; + + switch (s->cc_op) { + case CC_OP_FLAGS: return; - gen_flush_cc_op(s); - gen_helper_flush_flags(cpu_env, QREG_CC_OP); + + case CC_OP_ADD: + tcg_gen_mov_i32(QREG_CC_C, QREG_CC_X); + tcg_gen_mov_i32(QREG_CC_Z, QREG_CC_N); + /* Compute signed overflow for addition. */ + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + tcg_gen_sub_i32(t0, QREG_CC_N, QREG_CC_V); + tcg_gen_xor_i32(t1, QREG_CC_N, QREG_CC_V); + tcg_gen_xor_i32(QREG_CC_V, QREG_CC_V, t0); + tcg_temp_free(t0); + tcg_gen_andc_i32(QREG_CC_V, t1, QREG_CC_V); + tcg_temp_free(t1); + break; + + case CC_OP_SUB: + tcg_gen_mov_i32(QREG_CC_C, QREG_CC_X); + tcg_gen_mov_i32(QREG_CC_Z, QREG_CC_N); + /* Compute signed overflow for subtraction. */ + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + tcg_gen_add_i32(t0, QREG_CC_N, QREG_CC_V); + tcg_gen_xor_i32(t1, QREG_CC_N, QREG_CC_V); + tcg_gen_xor_i32(QREG_CC_V, QREG_CC_V, t0); + tcg_temp_free(t0); + tcg_gen_and_i32(QREG_CC_V, QREG_CC_V, t1); + tcg_temp_free(t1); + break; + + case CC_OP_CMP: + tcg_gen_setcond_i32(TCG_COND_LTU, QREG_CC_C, QREG_CC_N, QREG_CC_V); + tcg_gen_sub_i32(QREG_CC_Z, QREG_CC_N, QREG_CC_V); + /* Compute signed overflow for subtraction. */ + t0 = tcg_temp_new(); + tcg_gen_xor_i32(t0, QREG_CC_Z, QREG_CC_N); + tcg_gen_xor_i32(QREG_CC_V, QREG_CC_V, QREG_CC_N); + tcg_gen_and_i32(QREG_CC_V, QREG_CC_V, t0); + tcg_temp_free(t0); + tcg_gen_mov_i32(QREG_CC_N, QREG_CC_Z); + break; + + case CC_OP_LOGIC: + tcg_gen_mov_i32(QREG_CC_Z, QREG_CC_N); + tcg_gen_movi_i32(QREG_CC_C, 0); + tcg_gen_movi_i32(QREG_CC_V, 0); + break; + + case CC_OP_DYNAMIC: + gen_helper_flush_flags(cpu_env, QREG_CC_OP); + break; + + default: + t0 = tcg_const_i32(s->cc_op); + gen_helper_flush_flags(cpu_env, t0); + tcg_temp_free(t0); + break; + } + + /* Note that flush_flags also assigned to env->cc_op. */ s->cc_op = CC_OP_FLAGS; + s->cc_op_synced = 1; +} + +/* Sign or zero extend a value. */ + +static inline void gen_ext(TCGv res, TCGv val, int opsize, int sign) +{ + switch (opsize) { + case OS_BYTE: + if (sign) { + tcg_gen_ext8s_i32(res, val); + } else { + tcg_gen_ext8u_i32(res, val); + } + break; + case OS_WORD: + if (sign) { + tcg_gen_ext16s_i32(res, val); + } else { + tcg_gen_ext16u_i32(res, val); + } + break; + case OS_LONG: + tcg_gen_mov_i32(res, val); + break; + default: + g_assert_not_reached(); + } +} + +static TCGv gen_extend(TCGv val, int opsize, int sign) +{ + TCGv tmp; + + if (opsize == OS_LONG) { + tmp = val; + } else { + tmp = tcg_temp_new(); + gen_ext(tmp, val, opsize, sign); + } + + return tmp; } -static void gen_logic_cc(DisasContext *s, TCGv val) +static void gen_logic_cc(DisasContext *s, TCGv val, int opsize) { - tcg_gen_mov_i32(QREG_CC_DEST, val); - s->cc_op = CC_OP_LOGIC; + gen_ext(QREG_CC_N, val, opsize, 1); + set_cc_op(s, CC_OP_LOGIC); } static void gen_update_cc_add(TCGv dest, TCGv src) { - tcg_gen_mov_i32(QREG_CC_DEST, dest); - tcg_gen_mov_i32(QREG_CC_SRC, src); + tcg_gen_mov_i32(QREG_CC_N, dest); + tcg_gen_mov_i32(QREG_CC_V, src); } static inline int opsize_bytes(int opsize) @@ -438,6 +587,19 @@ static inline int opsize_bytes(int opsize) case OS_LONG: return 4; case OS_SINGLE: return 4; case OS_DOUBLE: return 8; + case OS_EXTENDED: return 12; + case OS_PACKED: return 12; + default: + g_assert_not_reached(); + } +} + +static inline int insn_opsize(int insn) +{ + switch ((insn >> 6) & 3) { + case 0: return OS_BYTE; + case 1: return OS_WORD; + case 2: return OS_LONG; default: g_assert_not_reached(); } @@ -470,36 +632,6 @@ static void gen_partset_reg(int opsize, TCGv reg, TCGv val) } } -/* Sign or zero extend a value. */ -static inline TCGv gen_extend(TCGv val, int opsize, int sign) -{ - TCGv tmp; - - switch (opsize) { - case OS_BYTE: - tmp = tcg_temp_new(); - if (sign) - tcg_gen_ext8s_i32(tmp, val); - else - tcg_gen_ext8u_i32(tmp, val); - break; - case OS_WORD: - tmp = tcg_temp_new(); - if (sign) - tcg_gen_ext16s_i32(tmp, val); - else - tcg_gen_ext16u_i32(tmp, val); - break; - case OS_LONG: - case OS_SINGLE: - tmp = val; - break; - default: - g_assert_not_reached(); - } - return tmp; -} - /* Generate code for an "effective address". Does not adjust the base register for autoincrement addressing modes. */ static TCGv gen_lea(CPUM68KState *env, DisasContext *s, uint16_t insn, @@ -525,8 +657,7 @@ static TCGv gen_lea(CPUM68KState *env, DisasContext *s, uint16_t insn, case 5: /* Indirect displacement. */ reg = AREG(insn, 0); tmp = tcg_temp_new(); - ext = cpu_lduw_code(env, s->pc); - s->pc += 2; + ext = read_im16(env, s); tcg_gen_addi_i32(tmp, reg, (int16_t)ext); return tmp; case 6: /* Indirect index + displacement. */ @@ -535,16 +666,14 @@ static TCGv gen_lea(CPUM68KState *env, DisasContext *s, uint16_t insn, case 7: /* Other */ switch (insn & 7) { case 0: /* Absolute short. */ - offset = cpu_ldsw_code(env, s->pc); - s->pc += 2; + offset = (int16_t)read_im16(env, s); return tcg_const_i32(offset); case 1: /* Absolute long. */ offset = read_im32(env, s); return tcg_const_i32(offset); case 2: /* pc displacement */ offset = s->pc; - offset += cpu_ldsw_code(env, s->pc); - s->pc += 2; + offset += (int16_t)read_im16(env, s); return tcg_const_i32(offset); case 3: /* pc index+displacement. */ return gen_lea_indexed(env, s, NULL_QREG); @@ -651,19 +780,17 @@ static TCGv gen_ea(CPUM68KState *env, DisasContext *s, uint16_t insn, switch (opsize) { case OS_BYTE: if (what == EA_LOADS) { - offset = cpu_ldsb_code(env, s->pc + 1); + offset = (int8_t)read_im8(env, s); } else { - offset = cpu_ldub_code(env, s->pc + 1); + offset = read_im8(env, s); } - s->pc += 2; break; case OS_WORD: if (what == EA_LOADS) { - offset = cpu_ldsw_code(env, s->pc); + offset = (int16_t)read_im16(env, s); } else { - offset = cpu_lduw_code(env, s->pc); + offset = read_im16(env, s); } - s->pc += 2; break; case OS_LONG: offset = read_im32(env, s); @@ -680,131 +807,230 @@ static TCGv gen_ea(CPUM68KState *env, DisasContext *s, uint16_t insn, return NULL_QREG; } -/* This generates a conditional branch, clobbering all temporaries. */ -static void gen_jmpcc(DisasContext *s, int cond, TCGLabel *l1) -{ - TCGv tmp; +typedef struct { + TCGCond tcond; + bool g1; + bool g2; + TCGv v1; + TCGv v2; +} DisasCompare; + +static void gen_cc_cond(DisasCompare *c, DisasContext *s, int cond) +{ + TCGv tmp, tmp2; + TCGCond tcond; + CCOp op = s->cc_op; + + /* The CC_OP_CMP form can handle most normal comparisons directly. */ + if (op == CC_OP_CMP) { + c->g1 = c->g2 = 1; + c->v1 = QREG_CC_N; + c->v2 = QREG_CC_V; + switch (cond) { + case 2: /* HI */ + case 3: /* LS */ + tcond = TCG_COND_LEU; + goto done; + case 4: /* CC */ + case 5: /* CS */ + tcond = TCG_COND_LTU; + goto done; + case 6: /* NE */ + case 7: /* EQ */ + tcond = TCG_COND_EQ; + goto done; + case 10: /* PL */ + case 11: /* MI */ + c->g1 = c->g2 = 0; + c->v2 = tcg_const_i32(0); + c->v1 = tmp = tcg_temp_new(); + tcg_gen_sub_i32(tmp, QREG_CC_N, QREG_CC_V); + /* fallthru */ + case 12: /* GE */ + case 13: /* LT */ + tcond = TCG_COND_LT; + goto done; + case 14: /* GT */ + case 15: /* LE */ + tcond = TCG_COND_LE; + goto done; + } + } + + c->g1 = 1; + c->g2 = 0; + c->v2 = tcg_const_i32(0); - /* TODO: Optimize compare/branch pairs rather than always flushing - flag state to CC_OP_FLAGS. */ - gen_flush_flags(s); switch (cond) { case 0: /* T */ - tcg_gen_br(l1); - break; case 1: /* F */ + c->v1 = c->v2; + tcond = TCG_COND_NEVER; + goto done; + case 14: /* GT (!(Z || (N ^ V))) */ + case 15: /* LE (Z || (N ^ V)) */ + /* Logic operations clear V, which simplifies LE to (Z || N), + and since Z and N are co-located, this becomes a normal + comparison vs N. */ + if (op == CC_OP_LOGIC) { + c->v1 = QREG_CC_N; + tcond = TCG_COND_LE; + goto done; + } break; - case 2: /* HI (!C && !Z) */ - tmp = tcg_temp_new(); - tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_C | CCF_Z); - tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, l1); + case 12: /* GE (!(N ^ V)) */ + case 13: /* LT (N ^ V) */ + /* Logic operations clear V, which simplifies this to N. */ + if (op != CC_OP_LOGIC) { + break; + } + /* fallthru */ + case 10: /* PL (!N) */ + case 11: /* MI (N) */ + /* Several cases represent N normally. */ + if (op == CC_OP_ADD || op == CC_OP_SUB || op == CC_OP_LOGIC) { + c->v1 = QREG_CC_N; + tcond = TCG_COND_LT; + goto done; + } break; - case 3: /* LS (C || Z) */ - tmp = tcg_temp_new(); - tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_C | CCF_Z); - tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, l1); + case 6: /* NE (!Z) */ + case 7: /* EQ (Z) */ + /* Some cases fold Z into N. */ + if (op == CC_OP_ADD || op == CC_OP_SUB || op == CC_OP_LOGIC) { + tcond = TCG_COND_EQ; + c->v1 = QREG_CC_N; + goto done; + } break; case 4: /* CC (!C) */ - tmp = tcg_temp_new(); - tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_C); - tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, l1); + case 5: /* CS (C) */ + /* Some cases fold C into X. */ + if (op == CC_OP_ADD || op == CC_OP_SUB) { + tcond = TCG_COND_NE; + c->v1 = QREG_CC_X; + goto done; + } + /* fallthru */ + case 8: /* VC (!V) */ + case 9: /* VS (V) */ + /* Logic operations clear V and C. */ + if (op == CC_OP_LOGIC) { + tcond = TCG_COND_NEVER; + c->v1 = c->v2; + goto done; + } + break; + } + + /* Otherwise, flush flag state to CC_OP_FLAGS. */ + gen_flush_flags(s); + + switch (cond) { + case 0: /* T */ + case 1: /* F */ + default: + /* Invalid, or handled above. */ + abort(); + case 2: /* HI (!C && !Z) -> !(C || Z)*/ + case 3: /* LS (C || Z) */ + c->v1 = tmp = tcg_temp_new(); + c->g1 = 0; + tcg_gen_setcond_i32(TCG_COND_EQ, tmp, QREG_CC_Z, c->v2); + tcg_gen_or_i32(tmp, tmp, QREG_CC_C); + tcond = TCG_COND_NE; break; + case 4: /* CC (!C) */ case 5: /* CS (C) */ - tmp = tcg_temp_new(); - tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_C); - tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, l1); + c->v1 = QREG_CC_C; + tcond = TCG_COND_NE; break; case 6: /* NE (!Z) */ - tmp = tcg_temp_new(); - tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_Z); - tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, l1); - break; case 7: /* EQ (Z) */ - tmp = tcg_temp_new(); - tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_Z); - tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, l1); + c->v1 = QREG_CC_Z; + tcond = TCG_COND_EQ; break; case 8: /* VC (!V) */ - tmp = tcg_temp_new(); - tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_V); - tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, l1); - break; case 9: /* VS (V) */ - tmp = tcg_temp_new(); - tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_V); - tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, l1); + c->v1 = QREG_CC_V; + tcond = TCG_COND_LT; break; case 10: /* PL (!N) */ - tmp = tcg_temp_new(); - tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_N); - tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, l1); - break; case 11: /* MI (N) */ - tmp = tcg_temp_new(); - tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_N); - tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, l1); + c->v1 = QREG_CC_N; + tcond = TCG_COND_LT; break; case 12: /* GE (!(N ^ V)) */ - tmp = tcg_temp_new(); - assert(CCF_V == (CCF_N >> 2)); - tcg_gen_shri_i32(tmp, QREG_CC_DEST, 2); - tcg_gen_xor_i32(tmp, tmp, QREG_CC_DEST); - tcg_gen_andi_i32(tmp, tmp, CCF_V); - tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, l1); - break; case 13: /* LT (N ^ V) */ - tmp = tcg_temp_new(); - assert(CCF_V == (CCF_N >> 2)); - tcg_gen_shri_i32(tmp, QREG_CC_DEST, 2); - tcg_gen_xor_i32(tmp, tmp, QREG_CC_DEST); - tcg_gen_andi_i32(tmp, tmp, CCF_V); - tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, l1); + c->v1 = tmp = tcg_temp_new(); + c->g1 = 0; + tcg_gen_xor_i32(tmp, QREG_CC_N, QREG_CC_V); + tcond = TCG_COND_LT; break; case 14: /* GT (!(Z || (N ^ V))) */ - tmp = tcg_temp_new(); - assert(CCF_V == (CCF_N >> 2)); - tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_N); - tcg_gen_shri_i32(tmp, tmp, 2); - tcg_gen_xor_i32(tmp, tmp, QREG_CC_DEST); - tcg_gen_andi_i32(tmp, tmp, CCF_V | CCF_Z); - tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, l1); - break; case 15: /* LE (Z || (N ^ V)) */ - tmp = tcg_temp_new(); - assert(CCF_V == (CCF_N >> 2)); - tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_N); - tcg_gen_shri_i32(tmp, tmp, 2); - tcg_gen_xor_i32(tmp, tmp, QREG_CC_DEST); - tcg_gen_andi_i32(tmp, tmp, CCF_V | CCF_Z); - tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, l1); + c->v1 = tmp = tcg_temp_new(); + c->g1 = 0; + tcg_gen_setcond_i32(TCG_COND_EQ, tmp, QREG_CC_Z, c->v2); + tcg_gen_neg_i32(tmp, tmp); + tmp2 = tcg_temp_new(); + tcg_gen_xor_i32(tmp2, QREG_CC_N, QREG_CC_V); + tcg_gen_or_i32(tmp, tmp, tmp2); + tcg_temp_free(tmp2); + tcond = TCG_COND_LT; break; - default: - /* Should ever happen. */ - abort(); + } + + done: + if ((cond & 1) == 0) { + tcond = tcg_invert_cond(tcond); + } + c->tcond = tcond; +} + +static void free_cond(DisasCompare *c) +{ + if (!c->g1) { + tcg_temp_free(c->v1); + } + if (!c->g2) { + tcg_temp_free(c->v2); } } +static void gen_jmpcc(DisasContext *s, int cond, TCGLabel *l1) +{ + DisasCompare c; + + gen_cc_cond(&c, s, cond); + update_cc_op(s); + tcg_gen_brcond_i32(c.tcond, c.v1, c.v2, l1); + free_cond(&c); +} + DISAS_INSN(scc) { - TCGLabel *l1; + DisasCompare c; int cond; - TCGv reg; + TCGv reg, tmp; - l1 = gen_new_label(); cond = (insn >> 8) & 0xf; + gen_cc_cond(&c, s, cond); + + tmp = tcg_temp_new(); + tcg_gen_setcond_i32(c.tcond, tmp, c.v1, c.v2); + free_cond(&c); + reg = DREG(insn, 0); - tcg_gen_andi_i32(reg, reg, 0xffffff00); - /* This is safe because we modify the reg directly, with no other values - live. */ - gen_jmpcc(s, cond ^ 1, l1); - tcg_gen_ori_i32(reg, reg, 0xff); - gen_set_label(l1); + tcg_gen_neg_i32(tmp, tmp); + tcg_gen_deposit_i32(reg, reg, tmp, 0, 8); + tcg_temp_free(tmp); } /* Force a TB lookup after an instruction that changes the CPU state. */ static void gen_lookup_tb(DisasContext *s) { - gen_flush_cc_op(s); + update_cc_op(s); tcg_gen_movi_i32(QREG_PC, s->pc); s->is_jmp = DISAS_UPDATE; } @@ -812,7 +1038,7 @@ static void gen_lookup_tb(DisasContext *s) /* Generate a jump to an immediate address. */ static void gen_jmp_im(DisasContext *s, uint32_t dest) { - gen_flush_cc_op(s); + update_cc_op(s); tcg_gen_movi_i32(QREG_PC, dest); s->is_jmp = DISAS_JUMP; } @@ -820,14 +1046,14 @@ static void gen_jmp_im(DisasContext *s, uint32_t dest) /* Generate a jump to the address in qreg DEST. */ static void gen_jmp(DisasContext *s, TCGv dest) { - gen_flush_cc_op(s); + update_cc_op(s); tcg_gen_mov_i32(QREG_PC, dest); s->is_jmp = DISAS_JUMP; } static void gen_exception(DisasContext *s, uint32_t where, int nr) { - gen_flush_cc_op(s); + update_cc_op(s); gen_jmp_im(s, where); gen_helper_raise_exception(cpu_env, tcg_const_i32(nr)); } @@ -915,8 +1141,7 @@ DISAS_INSN(mulw) SRC_EA(env, src, OS_WORD, sign, NULL); tcg_gen_mul_i32(tmp, tmp, src); tcg_gen_mov_i32(reg, tmp); - /* Unlike m68k, coldfire always clears the overflow bit. */ - gen_logic_cc(s, tmp); + gen_logic_cc(s, tmp, OS_WORD); } DISAS_INSN(divw) @@ -946,7 +1171,8 @@ DISAS_INSN(divw) tcg_gen_ext16u_i32(tmp, QREG_DIV1); tcg_gen_shli_i32(src, QREG_DIV2, 16); tcg_gen_or_i32(reg, tmp, src); - s->cc_op = CC_OP_FLAGS; + + set_cc_op(s, CC_OP_FLAGS); } DISAS_INSN(divl) @@ -956,8 +1182,7 @@ DISAS_INSN(divl) TCGv reg; uint16_t ext; - ext = cpu_lduw_code(env, s->pc); - s->pc += 2; + ext = read_im16(env, s); if (ext & 0x87f8) { gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED); return; @@ -979,7 +1204,7 @@ DISAS_INSN(divl) /* rem */ tcg_gen_mov_i32 (reg, QREG_DIV2); } - s->cc_op = CC_OP_FLAGS; + set_cc_op(s, CC_OP_FLAGS); } DISAS_INSN(addsub) @@ -1003,12 +1228,12 @@ DISAS_INSN(addsub) } if (add) { tcg_gen_add_i32(dest, tmp, src); - gen_helper_xflag_lt(QREG_CC_X, dest, src); - s->cc_op = CC_OP_ADD; + tcg_gen_setcond_i32(TCG_COND_LTU, QREG_CC_X, dest, src); + set_cc_op(s, CC_OP_ADD); } else { - gen_helper_xflag_lt(QREG_CC_X, tmp, src); + tcg_gen_setcond_i32(TCG_COND_LTU, QREG_CC_X, tmp, src); tcg_gen_sub_i32(dest, tmp, src); - s->cc_op = CC_OP_SUB; + set_cc_op(s, CC_OP_SUB); } gen_update_cc_add(dest, src); if (insn & 0x100) { @@ -1042,42 +1267,43 @@ DISAS_INSN(bitop_reg) else opsize = OS_LONG; op = (insn >> 6) & 3; + + gen_flush_flags(s); + SRC_EA(env, src1, opsize, 0, op ? &addr: NULL); src2 = DREG(insn, 9); dest = tcg_temp_new(); - gen_flush_flags(s); tmp = tcg_temp_new(); if (opsize == OS_BYTE) tcg_gen_andi_i32(tmp, src2, 7); else tcg_gen_andi_i32(tmp, src2, 31); - src2 = tmp; - tmp = tcg_temp_new(); - tcg_gen_shr_i32(tmp, src1, src2); - tcg_gen_andi_i32(tmp, tmp, 1); - tcg_gen_shli_i32(tmp, tmp, 2); - /* Clear CCF_Z if bit set. */ - tcg_gen_ori_i32(QREG_CC_DEST, QREG_CC_DEST, CCF_Z); - tcg_gen_xor_i32(QREG_CC_DEST, QREG_CC_DEST, tmp); - - tcg_gen_shl_i32(tmp, tcg_const_i32(1), src2); + + src2 = tcg_const_i32(1); + tcg_gen_shl_i32(src2, src2, tmp); + tcg_temp_free(tmp); + + tcg_gen_and_i32(QREG_CC_Z, src1, src2); + switch (op) { case 1: /* bchg */ - tcg_gen_xor_i32(dest, src1, tmp); + tcg_gen_xor_i32(dest, src1, src2); break; case 2: /* bclr */ - tcg_gen_not_i32(tmp, tmp); - tcg_gen_and_i32(dest, src1, tmp); + tcg_gen_andc_i32(dest, src1, src2); break; case 3: /* bset */ - tcg_gen_or_i32(dest, src1, tmp); + tcg_gen_or_i32(dest, src1, src2); break; default: /* btst */ break; } - if (op) + tcg_temp_free(src2); + if (op) { DEST_EA(env, insn, opsize, dest, &addr); + } + tcg_temp_free(dest); } DISAS_INSN(sats) @@ -1085,8 +1311,8 @@ DISAS_INSN(sats) TCGv reg; reg = DREG(insn, 0); gen_flush_flags(s); - gen_helper_sats(reg, reg, QREG_CC_DEST); - gen_logic_cc(s, reg); + gen_helper_sats(reg, reg, QREG_CC_V); + gen_logic_cc(s, reg, OS_LONG); } static void gen_push(DisasContext *s, TCGv val) @@ -1108,8 +1334,7 @@ DISAS_INSN(movem) TCGv tmp; int is_load; - mask = cpu_lduw_code(env, s->pc); - s->pc += 2; + mask = read_im16(env, s); tmp = gen_lea(env, s, insn, OS_LONG); if (IS_NULL_QREG(tmp)) { gen_addr_fault(s); @@ -1152,35 +1377,26 @@ DISAS_INSN(bitop_im) opsize = OS_LONG; op = (insn >> 6) & 3; - bitnum = cpu_lduw_code(env, s->pc); - s->pc += 2; + bitnum = read_im16(env, s); if (bitnum & 0xff00) { disas_undef(env, s, insn); return; } + gen_flush_flags(s); + SRC_EA(env, src1, opsize, 0, op ? &addr: NULL); - gen_flush_flags(s); if (opsize == OS_BYTE) bitnum &= 7; else bitnum &= 31; mask = 1 << bitnum; - tmp = tcg_temp_new(); - assert (CCF_Z == (1 << 2)); - if (bitnum > 2) - tcg_gen_shri_i32(tmp, src1, bitnum - 2); - else if (bitnum < 2) - tcg_gen_shli_i32(tmp, src1, 2 - bitnum); - else - tcg_gen_mov_i32(tmp, src1); - tcg_gen_andi_i32(tmp, tmp, CCF_Z); - /* Clear CCF_Z if bit set. */ - tcg_gen_ori_i32(QREG_CC_DEST, QREG_CC_DEST, CCF_Z); - tcg_gen_xor_i32(QREG_CC_DEST, QREG_CC_DEST, tmp); + tcg_gen_andi_i32(QREG_CC_Z, src1, mask); + if (op) { + tmp = tcg_temp_new(); switch (op) { case 1: /* bchg */ tcg_gen_xori_i32(tmp, src1, mask); @@ -1195,6 +1411,7 @@ DISAS_INSN(bitop_im) break; } DEST_EA(env, insn, opsize, tmp, &addr); + tcg_temp_free(tmp); } } @@ -1213,35 +1430,33 @@ DISAS_INSN(arith_im) switch (op) { case 0: /* ori */ tcg_gen_ori_i32(dest, src1, im); - gen_logic_cc(s, dest); + gen_logic_cc(s, dest, OS_LONG); break; case 1: /* andi */ tcg_gen_andi_i32(dest, src1, im); - gen_logic_cc(s, dest); + gen_logic_cc(s, dest, OS_LONG); break; case 2: /* subi */ tcg_gen_mov_i32(dest, src1); - gen_helper_xflag_lt(QREG_CC_X, dest, tcg_const_i32(im)); + tcg_gen_setcondi_i32(TCG_COND_LTU, QREG_CC_X, dest, im); tcg_gen_subi_i32(dest, dest, im); gen_update_cc_add(dest, tcg_const_i32(im)); - s->cc_op = CC_OP_SUB; + set_cc_op(s, CC_OP_SUB); break; case 3: /* addi */ tcg_gen_mov_i32(dest, src1); tcg_gen_addi_i32(dest, dest, im); gen_update_cc_add(dest, tcg_const_i32(im)); - gen_helper_xflag_lt(QREG_CC_X, dest, tcg_const_i32(im)); - s->cc_op = CC_OP_ADD; + tcg_gen_setcondi_i32(TCG_COND_LTU, QREG_CC_X, dest, im); + set_cc_op(s, CC_OP_ADD); break; case 5: /* eori */ tcg_gen_xori_i32(dest, src1, im); - gen_logic_cc(s, dest); + gen_logic_cc(s, dest, OS_LONG); break; case 6: /* cmpi */ - tcg_gen_mov_i32(dest, src1); - tcg_gen_subi_i32(dest, dest, im); - gen_update_cc_add(dest, tcg_const_i32(im)); - s->cc_op = CC_OP_SUB; + gen_update_cc_add(src1, tcg_const_i32(im)); + set_cc_op(s, CC_OP_CMP); break; default: abort(); @@ -1292,7 +1507,7 @@ DISAS_INSN(move) dest_ea = ((insn >> 9) & 7) | (op << 3); DEST_EA(env, dest_ea, opsize, src, NULL); /* This will be correct because loads sign extend. */ - gen_logic_cc(s, src); + gen_logic_cc(s, src, opsize); } } @@ -1323,21 +1538,9 @@ DISAS_INSN(clr) { int opsize; - switch ((insn >> 6) & 3) { - case 0: /* clr.b */ - opsize = OS_BYTE; - break; - case 1: /* clr.w */ - opsize = OS_WORD; - break; - case 2: /* clr.l */ - opsize = OS_LONG; - break; - default: - abort(); - } + opsize = insn_opsize(insn); DEST_EA(env, insn, opsize, tcg_const_i32(0), NULL); - gen_logic_cc(s, tcg_const_i32(0)); + gen_logic_cc(s, tcg_const_i32(0), opsize); } static TCGv gen_get_ccr(DisasContext *s) @@ -1345,20 +1548,18 @@ static TCGv gen_get_ccr(DisasContext *s) TCGv dest; gen_flush_flags(s); + update_cc_op(s); dest = tcg_temp_new(); - tcg_gen_shli_i32(dest, QREG_CC_X, 4); - tcg_gen_or_i32(dest, dest, QREG_CC_DEST); + gen_helper_get_ccr(dest, cpu_env); return dest; } DISAS_INSN(move_from_ccr) { - TCGv reg; TCGv ccr; ccr = gen_get_ccr(s); - reg = DREG(insn, 0); - gen_partset_reg(OS_WORD, reg, ccr); + DEST_EA(env, insn, OS_WORD, ccr, NULL); } DISAS_INSN(neg) @@ -1370,50 +1571,45 @@ DISAS_INSN(neg) src1 = tcg_temp_new(); tcg_gen_mov_i32(src1, reg); tcg_gen_neg_i32(reg, src1); - s->cc_op = CC_OP_SUB; gen_update_cc_add(reg, src1); - gen_helper_xflag_lt(QREG_CC_X, tcg_const_i32(0), src1); - s->cc_op = CC_OP_SUB; + tcg_gen_setcondi_i32(TCG_COND_NE, QREG_CC_X, src1, 0); + set_cc_op(s, CC_OP_SUB); } static void gen_set_sr_im(DisasContext *s, uint16_t val, int ccr_only) { - tcg_gen_movi_i32(QREG_CC_DEST, val & 0xf); - tcg_gen_movi_i32(QREG_CC_X, (val & 0x10) >> 4); - if (!ccr_only) { - gen_helper_set_sr(cpu_env, tcg_const_i32(val & 0xff00)); + if (ccr_only) { + tcg_gen_movi_i32(QREG_CC_C, val & CCF_C ? 1 : 0); + tcg_gen_movi_i32(QREG_CC_V, val & CCF_V ? -1 : 0); + tcg_gen_movi_i32(QREG_CC_Z, val & CCF_Z ? 0 : 1); + tcg_gen_movi_i32(QREG_CC_N, val & CCF_N ? -1 : 0); + tcg_gen_movi_i32(QREG_CC_X, val & CCF_X ? 1 : 0); + } else { + gen_helper_set_sr(cpu_env, tcg_const_i32(val)); } + set_cc_op(s, CC_OP_FLAGS); } static void gen_set_sr(CPUM68KState *env, DisasContext *s, uint16_t insn, int ccr_only) { - TCGv tmp; - TCGv reg; - - s->cc_op = CC_OP_FLAGS; - if ((insn & 0x38) == 0) - { - tmp = tcg_temp_new(); - reg = DREG(insn, 0); - tcg_gen_andi_i32(QREG_CC_DEST, reg, 0xf); - tcg_gen_shri_i32(tmp, reg, 4); - tcg_gen_andi_i32(QREG_CC_X, tmp, 1); - if (!ccr_only) { - gen_helper_set_sr(cpu_env, reg); + if ((insn & 0x38) == 0) { + if (ccr_only) { + gen_helper_set_ccr(cpu_env, DREG(insn, 0)); + } else { + gen_helper_set_sr(cpu_env, DREG(insn, 0)); } - } - else if ((insn & 0x3f) == 0x3c) - { + set_cc_op(s, CC_OP_FLAGS); + } else if ((insn & 0x3f) == 0x3c) { uint16_t val; - val = cpu_lduw_code(env, s->pc); - s->pc += 2; + val = read_im16(env, s); gen_set_sr_im(s, val, ccr_only); - } - else + } else { disas_undef(env, s, insn); + } } + DISAS_INSN(move_to_ccr) { gen_set_sr(env, s, insn, 1); @@ -1425,7 +1621,7 @@ DISAS_INSN(not) reg = DREG(insn, 0); tcg_gen_not_i32(reg, reg); - gen_logic_cc(s, reg); + gen_logic_cc(s, reg, OS_LONG); } DISAS_INSN(swap) @@ -1440,7 +1636,7 @@ DISAS_INSN(swap) tcg_gen_shli_i32(src1, reg, 16); tcg_gen_shri_i32(src2, reg, 16); tcg_gen_or_i32(reg, src1, src2); - gen_logic_cc(s, reg); + gen_logic_cc(s, reg, OS_LONG); } DISAS_INSN(pea) @@ -1472,7 +1668,7 @@ DISAS_INSN(ext) gen_partset_reg(OS_WORD, reg, tmp); else tcg_gen_mov_i32(reg, tmp); - gen_logic_cc(s, tmp); + gen_logic_cc(s, tmp, OS_LONG); } DISAS_INSN(tst) @@ -1480,21 +1676,9 @@ DISAS_INSN(tst) int opsize; TCGv tmp; - switch ((insn >> 6) & 3) { - case 0: /* tst.b */ - opsize = OS_BYTE; - break; - case 1: /* tst.w */ - opsize = OS_WORD; - break; - case 2: /* tst.l */ - opsize = OS_LONG; - break; - default: - abort(); - } + opsize = insn_opsize(insn); SRC_EA(env, tmp, opsize, 1, NULL); - gen_logic_cc(s, tmp); + gen_logic_cc(s, tmp, opsize); } DISAS_INSN(pulse) @@ -1516,7 +1700,7 @@ DISAS_INSN(tas) dest = tcg_temp_new(); SRC_EA(env, src1, OS_BYTE, 1, &addr); - gen_logic_cc(s, src1); + gen_logic_cc(s, src1, OS_BYTE); tcg_gen_ori_i32(dest, src1, 0x80); DEST_EA(env, insn, OS_BYTE, dest, &addr); } @@ -1530,8 +1714,7 @@ DISAS_INSN(mull) /* The upper 32 bits of the product are discarded, so muls.l and mulu.l are functionally equivalent. */ - ext = cpu_lduw_code(env, s->pc); - s->pc += 2; + ext = read_im16(env, s); if (ext & 0x87ff) { gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED); return; @@ -1542,7 +1725,7 @@ DISAS_INSN(mull) tcg_gen_mul_i32(dest, src1, reg); tcg_gen_mov_i32(reg, dest); /* Unlike m68k, coldfire always clears the overflow bit. */ - gen_logic_cc(s, dest); + gen_logic_cc(s, dest, OS_LONG); } DISAS_INSN(link) @@ -1632,13 +1815,13 @@ DISAS_INSN(addsubq) } else { src2 = tcg_const_i32(val); if (insn & 0x0100) { - gen_helper_xflag_lt(QREG_CC_X, dest, src2); - tcg_gen_subi_i32(dest, dest, val); - s->cc_op = CC_OP_SUB; + tcg_gen_setcond_i32(TCG_COND_LTU, QREG_CC_X, dest, src2); + tcg_gen_sub_i32(dest, dest, src2); + set_cc_op(s, CC_OP_SUB); } else { - tcg_gen_addi_i32(dest, dest, val); - gen_helper_xflag_lt(QREG_CC_X, dest, src2); - s->cc_op = CC_OP_ADD; + tcg_gen_add_i32(dest, dest, src2); + tcg_gen_setcond_i32(TCG_COND_LTU, QREG_CC_X, dest, src2); + set_cc_op(s, CC_OP_ADD); } gen_update_cc_add(dest, src2); } @@ -1672,8 +1855,7 @@ DISAS_INSN(branch) op = (insn >> 8) & 0xf; offset = (int8_t)insn; if (offset == 0) { - offset = cpu_ldsw_code(env, s->pc); - s->pc += 2; + offset = (int16_t)read_im16(env, s); } else if (offset == -1) { offset = read_im32(env, s); } @@ -1681,7 +1863,6 @@ DISAS_INSN(branch) /* bsr */ gen_push(s, tcg_const_i32(s->pc)); } - gen_flush_cc_op(s); if (op > 1) { /* Bcc */ l1 = gen_new_label(); @@ -1701,7 +1882,7 @@ DISAS_INSN(moveq) val = (int8_t)insn; tcg_gen_movi_i32(DREG(insn, 9), val); - gen_logic_cc(s, tcg_const_i32(val)); + gen_logic_cc(s, tcg_const_i32(val), OS_LONG); } DISAS_INSN(mvzs) @@ -1717,7 +1898,7 @@ DISAS_INSN(mvzs) SRC_EA(env, src, opsize, (insn & 0x80) == 0, NULL); reg = DREG(insn, 9); tcg_gen_mov_i32(reg, src); - gen_logic_cc(s, src); + gen_logic_cc(s, src, opsize); } DISAS_INSN(or) @@ -1738,7 +1919,7 @@ DISAS_INSN(or) tcg_gen_or_i32(dest, src, reg); tcg_gen_mov_i32(reg, dest); } - gen_logic_cc(s, dest); + gen_logic_cc(s, dest, OS_LONG); } DISAS_INSN(suba) @@ -1771,40 +1952,21 @@ DISAS_INSN(mov3q) if (val == 0) val = -1; src = tcg_const_i32(val); - gen_logic_cc(s, src); + gen_logic_cc(s, src, OS_LONG); DEST_EA(env, insn, OS_LONG, src, NULL); } DISAS_INSN(cmp) { - int op; TCGv src; TCGv reg; - TCGv dest; int opsize; - op = (insn >> 6) & 3; - switch (op) { - case 0: /* cmp.b */ - opsize = OS_BYTE; - s->cc_op = CC_OP_CMPB; - break; - case 1: /* cmp.w */ - opsize = OS_WORD; - s->cc_op = CC_OP_CMPW; - break; - case 2: /* cmp.l */ - opsize = OS_LONG; - s->cc_op = CC_OP_SUB; - break; - default: - abort(); - } - SRC_EA(env, src, opsize, 1, NULL); + opsize = insn_opsize(insn); + SRC_EA(env, src, opsize, -1, NULL); reg = DREG(insn, 9); - dest = tcg_temp_new(); - tcg_gen_sub_i32(dest, reg, src); - gen_update_cc_add(dest, src); + gen_update_cc_add(reg, src); + set_cc_op(s, CC_OP_CMP); } DISAS_INSN(cmpa) @@ -1812,7 +1974,6 @@ DISAS_INSN(cmpa) int opsize; TCGv src; TCGv reg; - TCGv dest; if (insn & 0x100) { opsize = OS_LONG; @@ -1821,10 +1982,8 @@ DISAS_INSN(cmpa) } SRC_EA(env, src, opsize, 1, NULL); reg = AREG(insn, 9); - dest = tcg_temp_new(); - tcg_gen_sub_i32(dest, reg, src); - gen_update_cc_add(dest, src); - s->cc_op = CC_OP_SUB; + gen_update_cc_add(reg, src); + set_cc_op(s, CC_OP_CMP); } DISAS_INSN(eor) @@ -1838,7 +1997,7 @@ DISAS_INSN(eor) reg = DREG(insn, 9); dest = tcg_temp_new(); tcg_gen_xor_i32(dest, src, reg); - gen_logic_cc(s, dest); + gen_logic_cc(s, dest, OS_LONG); DEST_EA(env, insn, OS_LONG, dest, &addr); } @@ -1860,7 +2019,7 @@ DISAS_INSN(and) tcg_gen_and_i32(dest, src, reg); tcg_gen_mov_i32(reg, dest); } - gen_logic_cc(s, dest); + gen_logic_cc(s, dest, OS_LONG); } DISAS_INSN(adda) @@ -1882,7 +2041,6 @@ DISAS_INSN(addx) reg = DREG(insn, 9); src = DREG(insn, 0); gen_helper_addx_cc(reg, cpu_env, reg, src); - s->cc_op = CC_OP_FLAGS; } /* TODO: This could be implemented without helper functions. */ @@ -1892,6 +2050,8 @@ DISAS_INSN(shift_im) int tmp; TCGv shift; + set_cc_op(s, CC_OP_FLAGS); + reg = DREG(insn, 0); tmp = (insn >> 9) & 7; if (tmp == 0) @@ -1907,7 +2067,6 @@ DISAS_INSN(shift_im) gen_helper_sar_cc(reg, cpu_env, reg, shift); } } - s->cc_op = CC_OP_SHIFT; } DISAS_INSN(shift_reg) @@ -1917,8 +2076,6 @@ DISAS_INSN(shift_reg) reg = DREG(insn, 0); shift = DREG(insn, 9); - /* Shift by zero leaves C flag unmodified. */ - gen_flush_flags(s); if (insn & 0x100) { gen_helper_shl_cc(reg, cpu_env, reg, shift); } else { @@ -1928,14 +2085,14 @@ DISAS_INSN(shift_reg) gen_helper_sar_cc(reg, cpu_env, reg, shift); } } - s->cc_op = CC_OP_SHIFT; + set_cc_op(s, CC_OP_FLAGS); } DISAS_INSN(ff1) { TCGv reg; reg = DREG(insn, 0); - gen_logic_cc(s, reg); + gen_logic_cc(s, reg, OS_LONG); gen_helper_ff1(reg, reg); } @@ -1957,14 +2114,12 @@ DISAS_INSN(strldsr) uint32_t addr; addr = s->pc - 2; - ext = cpu_lduw_code(env, s->pc); - s->pc += 2; + ext = read_im16(env, s); if (ext != 0x46FC) { gen_exception(s, addr, EXCP_UNSUPPORTED); return; } - ext = cpu_lduw_code(env, s->pc); - s->pc += 2; + ext = read_im16(env, s); if (IS_USER(s) || (ext & SR_S) == 0) { gen_exception(s, addr, EXCP_PRIVILEGE); return; @@ -1975,16 +2130,14 @@ DISAS_INSN(strldsr) DISAS_INSN(move_from_sr) { - TCGv reg; TCGv sr; - if (IS_USER(s)) { + if (IS_USER(s) && !m68k_feature(env, M68K_FEATURE_M68000)) { gen_exception(s, s->pc - 2, EXCP_PRIVILEGE); return; } sr = gen_get_sr(s); - reg = DREG(insn, 0); - gen_partset_reg(OS_WORD, reg, sr); + DEST_EA(env, insn, OS_WORD, sr, NULL); } DISAS_INSN(move_to_sr) @@ -2031,8 +2184,7 @@ DISAS_INSN(stop) return; } - ext = cpu_lduw_code(env, s->pc); - s->pc += 2; + ext = read_im16(env, s); gen_set_sr_im(s, ext, 0); tcg_gen_movi_i32(cpu_halted, 1); @@ -2058,8 +2210,7 @@ DISAS_INSN(movec) return; } - ext = cpu_lduw_code(env, s->pc); - s->pc += 2; + ext = read_im16(env, s); if (ext & 0x8000) { reg = AREG(ext, 12); @@ -2125,8 +2276,7 @@ DISAS_INSN(fpu) int set_dest; int opsize; - ext = cpu_lduw_code(env, s->pc); - s->pc += 2; + ext = read_im16(env, s); opmode = ext & 0x7f; switch ((ext >> 13) & 7) { case 0: case 2: @@ -2408,8 +2558,7 @@ DISAS_INSN(fbcc) offset = cpu_ldsw_code(env, s->pc); s->pc += 2; if (insn & (1 << 6)) { - offset = (offset << 16) | cpu_lduw_code(env, s->pc); - s->pc += 2; + offset = (offset << 16) | read_im16(env, s); } l1 = gen_new_label(); @@ -2534,8 +2683,7 @@ DISAS_INSN(mac) s->done_mac = 1; } - ext = cpu_lduw_code(env, s->pc); - s->pc += 2; + ext = read_im16(env, s); acc = ((insn >> 7) & 1) | ((ext >> 3) & 2); dual = ((insn & 0x30) != 0 && (ext & 3) != 0); @@ -2747,9 +2895,11 @@ DISAS_INSN(from_mext) DISAS_INSN(macsr_to_ccr) { - tcg_gen_movi_i32(QREG_CC_X, 0); - tcg_gen_andi_i32(QREG_CC_DEST, QREG_MACSR, 0xf); - s->cc_op = CC_OP_FLAGS; + TCGv tmp = tcg_temp_new(); + tcg_gen_andi_i32(tmp, QREG_MACSR, 0xf); + gen_helper_set_sr(cpu_env, tmp); + tcg_temp_free(tmp); + set_cc_op(s, CC_OP_FLAGS); } DISAS_INSN(to_mac) @@ -2841,90 +2991,124 @@ register_opcode (disas_proc proc, uint16_t opcode, uint16_t mask) Later insn override earlier ones. */ void register_m68k_insns (CPUM68KState *env) { + /* Build the opcode table only once to avoid + multithreading issues. */ + if (opcode_table[0] != NULL) { + return; + } + + /* use BASE() for instruction available + * for CF_ISA_A and M68000. + */ +#define BASE(name, opcode, mask) \ + register_opcode(disas_##name, 0x##opcode, 0x##mask) #define INSN(name, opcode, mask, feature) do { \ if (m68k_feature(env, M68K_FEATURE_##feature)) \ - register_opcode(disas_##name, 0x##opcode, 0x##mask); \ + BASE(name, opcode, mask); \ } while(0) - INSN(undef, 0000, 0000, CF_ISA_A); + BASE(undef, 0000, 0000); INSN(arith_im, 0080, fff8, CF_ISA_A); + INSN(arith_im, 0000, ff00, M68000); + INSN(undef, 00c0, ffc0, M68000); INSN(bitrev, 00c0, fff8, CF_ISA_APLUSC); - INSN(bitop_reg, 0100, f1c0, CF_ISA_A); - INSN(bitop_reg, 0140, f1c0, CF_ISA_A); - INSN(bitop_reg, 0180, f1c0, CF_ISA_A); - INSN(bitop_reg, 01c0, f1c0, CF_ISA_A); + BASE(bitop_reg, 0100, f1c0); + BASE(bitop_reg, 0140, f1c0); + BASE(bitop_reg, 0180, f1c0); + BASE(bitop_reg, 01c0, f1c0); INSN(arith_im, 0280, fff8, CF_ISA_A); + INSN(arith_im, 0200, ff00, M68000); + INSN(undef, 02c0, ffc0, M68000); INSN(byterev, 02c0, fff8, CF_ISA_APLUSC); INSN(arith_im, 0480, fff8, CF_ISA_A); + INSN(arith_im, 0400, ff00, M68000); + INSN(undef, 04c0, ffc0, M68000); + INSN(arith_im, 0600, ff00, M68000); + INSN(undef, 06c0, ffc0, M68000); INSN(ff1, 04c0, fff8, CF_ISA_APLUSC); INSN(arith_im, 0680, fff8, CF_ISA_A); - INSN(bitop_im, 0800, ffc0, CF_ISA_A); - INSN(bitop_im, 0840, ffc0, CF_ISA_A); - INSN(bitop_im, 0880, ffc0, CF_ISA_A); - INSN(bitop_im, 08c0, ffc0, CF_ISA_A); - INSN(arith_im, 0a80, fff8, CF_ISA_A); INSN(arith_im, 0c00, ff38, CF_ISA_A); - INSN(move, 1000, f000, CF_ISA_A); - INSN(move, 2000, f000, CF_ISA_A); - INSN(move, 3000, f000, CF_ISA_A); + INSN(arith_im, 0c00, ff00, M68000); + BASE(bitop_im, 0800, ffc0); + BASE(bitop_im, 0840, ffc0); + BASE(bitop_im, 0880, ffc0); + BASE(bitop_im, 08c0, ffc0); + INSN(arith_im, 0a80, fff8, CF_ISA_A); + INSN(arith_im, 0a00, ff00, M68000); + BASE(move, 1000, f000); + BASE(move, 2000, f000); + BASE(move, 3000, f000); INSN(strldsr, 40e7, ffff, CF_ISA_APLUSC); INSN(negx, 4080, fff8, CF_ISA_A); INSN(move_from_sr, 40c0, fff8, CF_ISA_A); - INSN(lea, 41c0, f1c0, CF_ISA_A); - INSN(clr, 4200, ff00, CF_ISA_A); - INSN(undef, 42c0, ffc0, CF_ISA_A); + INSN(move_from_sr, 40c0, ffc0, M68000); + BASE(lea, 41c0, f1c0); + BASE(clr, 4200, ff00); + BASE(undef, 42c0, ffc0); INSN(move_from_ccr, 42c0, fff8, CF_ISA_A); + INSN(move_from_ccr, 42c0, ffc0, M68000); INSN(neg, 4480, fff8, CF_ISA_A); - INSN(move_to_ccr, 44c0, ffc0, CF_ISA_A); + INSN(neg, 4400, ff00, M68000); + INSN(undef, 44c0, ffc0, M68000); + BASE(move_to_ccr, 44c0, ffc0); INSN(not, 4680, fff8, CF_ISA_A); + INSN(not, 4600, ff00, M68000); + INSN(undef, 46c0, ffc0, M68000); INSN(move_to_sr, 46c0, ffc0, CF_ISA_A); - INSN(pea, 4840, ffc0, CF_ISA_A); - INSN(swap, 4840, fff8, CF_ISA_A); - INSN(movem, 48c0, fbc0, CF_ISA_A); - INSN(ext, 4880, fff8, CF_ISA_A); - INSN(ext, 48c0, fff8, CF_ISA_A); - INSN(ext, 49c0, fff8, CF_ISA_A); - INSN(tst, 4a00, ff00, CF_ISA_A); + BASE(pea, 4840, ffc0); + BASE(swap, 4840, fff8); + BASE(movem, 48c0, fbc0); + BASE(ext, 4880, fff8); + BASE(ext, 48c0, fff8); + BASE(ext, 49c0, fff8); + BASE(tst, 4a00, ff00); INSN(tas, 4ac0, ffc0, CF_ISA_B); + INSN(tas, 4ac0, ffc0, M68000); INSN(halt, 4ac8, ffff, CF_ISA_A); INSN(pulse, 4acc, ffff, CF_ISA_A); - INSN(illegal, 4afc, ffff, CF_ISA_A); + BASE(illegal, 4afc, ffff); INSN(mull, 4c00, ffc0, CF_ISA_A); + INSN(mull, 4c00, ffc0, LONG_MULDIV); INSN(divl, 4c40, ffc0, CF_ISA_A); + INSN(divl, 4c40, ffc0, LONG_MULDIV); INSN(sats, 4c80, fff8, CF_ISA_B); - INSN(trap, 4e40, fff0, CF_ISA_A); - INSN(link, 4e50, fff8, CF_ISA_A); - INSN(unlk, 4e58, fff8, CF_ISA_A); + BASE(trap, 4e40, fff0); + BASE(link, 4e50, fff8); + BASE(unlk, 4e58, fff8); INSN(move_to_usp, 4e60, fff8, USP); INSN(move_from_usp, 4e68, fff8, USP); - INSN(nop, 4e71, ffff, CF_ISA_A); - INSN(stop, 4e72, ffff, CF_ISA_A); - INSN(rte, 4e73, ffff, CF_ISA_A); - INSN(rts, 4e75, ffff, CF_ISA_A); + BASE(nop, 4e71, ffff); + BASE(stop, 4e72, ffff); + BASE(rte, 4e73, ffff); + BASE(rts, 4e75, ffff); INSN(movec, 4e7b, ffff, CF_ISA_A); - INSN(jump, 4e80, ffc0, CF_ISA_A); + BASE(jump, 4e80, ffc0); INSN(jump, 4ec0, ffc0, CF_ISA_A); INSN(addsubq, 5180, f1c0, CF_ISA_A); + INSN(jump, 4ec0, ffc0, M68000); + INSN(addsubq, 5000, f080, M68000); + INSN(addsubq, 5080, f0c0, M68000); INSN(scc, 50c0, f0f8, CF_ISA_A); INSN(addsubq, 5080, f1c0, CF_ISA_A); INSN(tpf, 51f8, fff8, CF_ISA_A); /* Branch instructions. */ - INSN(branch, 6000, f000, CF_ISA_A); + BASE(branch, 6000, f000); /* Disable long branch instructions, then add back the ones we want. */ - INSN(undef, 60ff, f0ff, CF_ISA_A); /* All long branches. */ + BASE(undef, 60ff, f0ff); /* All long branches. */ INSN(branch, 60ff, f0ff, CF_ISA_B); INSN(undef, 60ff, ffff, CF_ISA_B); /* bra.l */ INSN(branch, 60ff, ffff, BRAL); + INSN(branch, 60ff, f0ff, BCCL); - INSN(moveq, 7000, f100, CF_ISA_A); + BASE(moveq, 7000, f100); INSN(mvzs, 7100, f100, CF_ISA_B); - INSN(or, 8000, f000, CF_ISA_A); - INSN(divw, 80c0, f0c0, CF_ISA_A); - INSN(addsub, 9000, f000, CF_ISA_A); + BASE(or, 8000, f000); + BASE(divw, 80c0, f0c0); + BASE(addsub, 9000, f000); INSN(subx, 9180, f1f8, CF_ISA_A); INSN(suba, 91c0, f1c0, CF_ISA_A); - INSN(undef_mac, a000, f000, CF_ISA_A); + BASE(undef_mac, a000, f000); INSN(mac, a000, f100, CF_EMAC); INSN(from_mac, a180, f9b0, CF_EMAC); INSN(move_mac, a110, f9fc, CF_EMAC); @@ -2943,12 +3127,16 @@ void register_m68k_insns (CPUM68KState *env) INSN(cmpa, b0c0, f1c0, CF_ISA_B); /* cmpa.w */ INSN(cmp, b080, f1c0, CF_ISA_A); INSN(cmpa, b1c0, f1c0, CF_ISA_A); + INSN(cmp, b000, f100, M68000); + INSN(eor, b100, f100, M68000); + INSN(cmpa, b0c0, f0c0, M68000); INSN(eor, b180, f1c0, CF_ISA_A); - INSN(and, c000, f000, CF_ISA_A); - INSN(mulw, c0c0, f0c0, CF_ISA_A); - INSN(addsub, d000, f000, CF_ISA_A); + BASE(and, c000, f000); + BASE(mulw, c0c0, f0c0); + BASE(addsub, d000, f000); INSN(addx, d180, f1f8, CF_ISA_A); INSN(adda, d1c0, f1c0, CF_ISA_A); + INSN(adda, d0c0, f0c0, M68000); INSN(shift_im, e080, f0f0, CF_ISA_A); INSN(shift_reg, e0a0, f0f0, CF_ISA_A); INSN(undef_fpu, f000, f000, CF_ISA_A); @@ -2969,8 +3157,7 @@ static void disas_m68k_insn(CPUM68KState * env, DisasContext *s) { uint16_t insn; - insn = cpu_lduw_code(env, s->pc); - s->pc += 2; + insn = read_im16(env, s); opcode_table[insn](env, s, insn); } @@ -2995,6 +3182,7 @@ void gen_intermediate_code(CPUM68KState *env, TranslationBlock *tb) dc->is_jmp = DISAS_NEXT; dc->pc = pc_start; dc->cc_op = CC_OP_DYNAMIC; + dc->cc_op_synced = 1; dc->singlestep_enabled = cs->singlestep_enabled; dc->fpcr = env->fpcr; dc->user = (env->sr & SR_S) == 0; @@ -3012,7 +3200,7 @@ void gen_intermediate_code(CPUM68KState *env, TranslationBlock *tb) do { pc_offset = dc->pc - pc_start; gen_throws_exception = NULL; - tcg_gen_insn_start(dc->pc); + tcg_gen_insn_start(dc->pc, dc->cc_op); num_insns++; if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) { @@ -3043,20 +3231,20 @@ void gen_intermediate_code(CPUM68KState *env, TranslationBlock *tb) if (unlikely(cs->singlestep_enabled)) { /* Make sure the pc is updated, and raise a debug exception. */ if (!dc->is_jmp) { - gen_flush_cc_op(dc); + update_cc_op(dc); tcg_gen_movi_i32(QREG_PC, dc->pc); } gen_helper_raise_exception(cpu_env, tcg_const_i32(EXCP_DEBUG)); } else { switch(dc->is_jmp) { case DISAS_NEXT: - gen_flush_cc_op(dc); + update_cc_op(dc); gen_jmp_tb(dc, 0, dc->pc); break; default: case DISAS_JUMP: case DISAS_UPDATE: - gen_flush_cc_op(dc); + update_cc_op(dc); /* indicate that the hash table must be used to find the next TB */ tcg_gen_exit_tb(0); break; @@ -3091,20 +3279,24 @@ void m68k_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, for (i = 0; i < 8; i++) { u.d = env->fregs[i]; - cpu_fprintf (f, "D%d = %08x A%d = %08x F%d = %08x%08x (%12g)\n", - i, env->dregs[i], i, env->aregs[i], - i, u.l.upper, u.l.lower, *(double *)&u.d); + cpu_fprintf(f, "D%d = %08x A%d = %08x F%d = %08x%08x (%12g)\n", + i, env->dregs[i], i, env->aregs[i], + i, u.l.upper, u.l.lower, *(double *)&u.d); } cpu_fprintf (f, "PC = %08x ", env->pc); - sr = env->sr; - cpu_fprintf (f, "SR = %04x %c%c%c%c%c ", sr, (sr & 0x10) ? 'X' : '-', - (sr & CCF_N) ? 'N' : '-', (sr & CCF_Z) ? 'Z' : '-', - (sr & CCF_V) ? 'V' : '-', (sr & CCF_C) ? 'C' : '-'); + sr = env->sr | cpu_m68k_get_ccr(env); + cpu_fprintf(f, "SR = %04x %c%c%c%c%c ", sr, (sr & CCF_X) ? 'X' : '-', + (sr & CCF_N) ? 'N' : '-', (sr & CCF_Z) ? 'Z' : '-', + (sr & CCF_V) ? 'V' : '-', (sr & CCF_C) ? 'C' : '-'); cpu_fprintf (f, "FPRESULT = %12g\n", *(double *)&env->fp_result); } void restore_state_to_opc(CPUM68KState *env, TranslationBlock *tb, target_ulong *data) { + int cc_op = data[1]; env->pc = data[0]; + if (cc_op != CC_OP_DYNAMIC) { + env->cc_op = cc_op; + } } diff --git a/tcg-runtime.c b/tcg-runtime.c index ea2ad649cb..9327b6f23b 100644 --- a/tcg-runtime.c +++ b/tcg-runtime.c @@ -23,17 +23,10 @@ */ #include "qemu/osdep.h" #include "qemu/host-utils.h" - -/* This file is compiled once, and thus we can't include the standard - "exec/helper-proto.h", which has includes that are target specific. */ - -#include "exec/helper-head.h" - -#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \ - dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2)); - -#include "tcg-runtime.h" - +#include "cpu.h" +#include "exec/helper-proto.h" +#include "exec/cpu_ldst.h" +#include "exec/exec-all.h" /* 32-bit helpers */ @@ -107,3 +100,62 @@ int64_t HELPER(mulsh_i64)(int64_t arg1, int64_t arg2) muls64(&l, &h, arg1, arg2); return h; } + +void HELPER(exit_atomic)(CPUArchState *env) +{ + cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC()); +} + +#ifndef CONFIG_SOFTMMU +/* The softmmu versions of these helpers are in cputlb.c. */ + +/* Do not allow unaligned operations to proceed. Return the host address. */ +static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, + int size, uintptr_t retaddr) +{ + /* Enforce qemu required alignment. */ + if (unlikely(addr & (size - 1))) { + cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr); + } + return g2h(addr); +} + +/* Macro to call the above, with local variables from the use context. */ +#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC()) + +#define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) +#define EXTRA_ARGS + +#define DATA_SIZE 1 +#include "atomic_template.h" + +#define DATA_SIZE 2 +#include "atomic_template.h" + +#define DATA_SIZE 4 +#include "atomic_template.h" + +#ifdef CONFIG_ATOMIC64 +#define DATA_SIZE 8 +#include "atomic_template.h" +#endif + +/* The following is only callable from other helpers, and matches up + with the softmmu version. */ + +#ifdef CONFIG_ATOMIC128 + +#undef EXTRA_ARGS +#undef ATOMIC_NAME +#undef ATOMIC_MMU_LOOKUP + +#define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr +#define ATOMIC_NAME(X) \ + HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) +#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, retaddr) + +#define DATA_SIZE 16 +#include "atomic_template.h" +#endif /* CONFIG_ATOMIC128 */ + +#endif /* !CONFIG_SOFTMMU */ diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 291d50bb7d..bb2bfeef3c 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -150,17 +150,7 @@ void tcg_gen_op6(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2, void tcg_gen_mb(TCGBar mb_type) { - bool emit_barriers = true; - -#ifndef CONFIG_USER_ONLY - /* TODO: When MTTCG is available for system mode, we will check - * the following condition and enable emit_barriers - * (qemu_tcg_mttcg_enabled() && smp_cpus > 1) - */ - emit_barriers = false; -#endif - - if (emit_barriers) { + if (parallel_cpus) { tcg_gen_op1(&tcg_ctx, INDEX_op_mb, mb_type); } } @@ -1975,3 +1965,345 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) addr, trace_mem_get_info(memop, 1)); gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx); } + +static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, TCGMemOp opc) +{ + switch (opc & MO_SSIZE) { + case MO_SB: + tcg_gen_ext8s_i32(ret, val); + break; + case MO_UB: + tcg_gen_ext8u_i32(ret, val); + break; + case MO_SW: + tcg_gen_ext16s_i32(ret, val); + break; + case MO_UW: + tcg_gen_ext16u_i32(ret, val); + break; + default: + tcg_gen_mov_i32(ret, val); + break; + } +} + +static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, TCGMemOp opc) +{ + switch (opc & MO_SSIZE) { + case MO_SB: + tcg_gen_ext8s_i64(ret, val); + break; + case MO_UB: + tcg_gen_ext8u_i64(ret, val); + break; + case MO_SW: + tcg_gen_ext16s_i64(ret, val); + break; + case MO_UW: + tcg_gen_ext16u_i64(ret, val); + break; + case MO_SL: + tcg_gen_ext32s_i64(ret, val); + break; + case MO_UL: + tcg_gen_ext32u_i64(ret, val); + break; + default: + tcg_gen_mov_i64(ret, val); + break; + } +} + +#ifdef CONFIG_SOFTMMU +typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv, + TCGv_i32, TCGv_i32, TCGv_i32); +typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv, + TCGv_i64, TCGv_i64, TCGv_i32); +typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv, + TCGv_i32, TCGv_i32); +typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv, + TCGv_i64, TCGv_i32); +#else +typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32, TCGv_i32); +typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64, TCGv_i64); +typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32); +typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64); +#endif + +#ifdef CONFIG_ATOMIC64 +# define WITH_ATOMIC64(X) X, +#else +# define WITH_ATOMIC64(X) +#endif + +static void * const table_cmpxchg[16] = { + [MO_8] = gen_helper_atomic_cmpxchgb, + [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le, + [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be, + [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le, + [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be, + WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le) + WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be) +}; + +void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv, + TCGv_i32 newv, TCGArg idx, TCGMemOp memop) +{ + memop = tcg_canonicalize_memop(memop, 0, 0); + + if (!parallel_cpus) { + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE); + + tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN); + tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1); + tcg_gen_qemu_st_i32(t2, addr, idx, memop); + tcg_temp_free_i32(t2); + + if (memop & MO_SIGN) { + tcg_gen_ext_i32(retv, t1, memop); + } else { + tcg_gen_mov_i32(retv, t1); + } + tcg_temp_free_i32(t1); + } else { + gen_atomic_cx_i32 gen; + + gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)]; + tcg_debug_assert(gen != NULL); + +#ifdef CONFIG_SOFTMMU + { + TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx)); + gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv, oi); + tcg_temp_free_i32(oi); + } +#else + gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv); +#endif + + if (memop & MO_SIGN) { + tcg_gen_ext_i32(retv, retv, memop); + } + } +} + +void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv, + TCGv_i64 newv, TCGArg idx, TCGMemOp memop) +{ + memop = tcg_canonicalize_memop(memop, 1, 0); + + if (!parallel_cpus) { + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE); + + tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN); + tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1); + tcg_gen_qemu_st_i64(t2, addr, idx, memop); + tcg_temp_free_i64(t2); + + if (memop & MO_SIGN) { + tcg_gen_ext_i64(retv, t1, memop); + } else { + tcg_gen_mov_i64(retv, t1); + } + tcg_temp_free_i64(t1); + } else if ((memop & MO_SIZE) == MO_64) { +#ifdef CONFIG_ATOMIC64 + gen_atomic_cx_i64 gen; + + gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)]; + tcg_debug_assert(gen != NULL); + +#ifdef CONFIG_SOFTMMU + { + TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop, idx)); + gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv, oi); + tcg_temp_free_i32(oi); + } +#else + gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv); +#endif +#else + gen_helper_exit_atomic(tcg_ctx.tcg_env); +#endif /* CONFIG_ATOMIC64 */ + } else { + TCGv_i32 c32 = tcg_temp_new_i32(); + TCGv_i32 n32 = tcg_temp_new_i32(); + TCGv_i32 r32 = tcg_temp_new_i32(); + + tcg_gen_extrl_i64_i32(c32, cmpv); + tcg_gen_extrl_i64_i32(n32, newv); + tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN); + tcg_temp_free_i32(c32); + tcg_temp_free_i32(n32); + + tcg_gen_extu_i32_i64(retv, r32); + tcg_temp_free_i32(r32); + + if (memop & MO_SIGN) { + tcg_gen_ext_i64(retv, retv, memop); + } + } +} + +static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val, + TCGArg idx, TCGMemOp memop, bool new_val, + void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32)) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + memop = tcg_canonicalize_memop(memop, 0, 0); + + tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN); + gen(t2, t1, val); + tcg_gen_qemu_st_i32(t2, addr, idx, memop); + + tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); +} + +static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val, + TCGArg idx, TCGMemOp memop, void * const table[]) +{ + gen_atomic_op_i32 gen; + + memop = tcg_canonicalize_memop(memop, 0, 0); + + gen = table[memop & (MO_SIZE | MO_BSWAP)]; + tcg_debug_assert(gen != NULL); + +#ifdef CONFIG_SOFTMMU + { + TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx)); + gen(ret, tcg_ctx.tcg_env, addr, val, oi); + tcg_temp_free_i32(oi); + } +#else + gen(ret, tcg_ctx.tcg_env, addr, val); +#endif + + if (memop & MO_SIGN) { + tcg_gen_ext_i32(ret, ret, memop); + } +} + +static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val, + TCGArg idx, TCGMemOp memop, bool new_val, + void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64)) +{ + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + memop = tcg_canonicalize_memop(memop, 1, 0); + + tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN); + gen(t2, t1, val); + tcg_gen_qemu_st_i64(t2, addr, idx, memop); + + tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); +} + +static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val, + TCGArg idx, TCGMemOp memop, void * const table[]) +{ + memop = tcg_canonicalize_memop(memop, 1, 0); + + if ((memop & MO_SIZE) == MO_64) { +#ifdef CONFIG_ATOMIC64 + gen_atomic_op_i64 gen; + + gen = table[memop & (MO_SIZE | MO_BSWAP)]; + tcg_debug_assert(gen != NULL); + +#ifdef CONFIG_SOFTMMU + { + TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx)); + gen(ret, tcg_ctx.tcg_env, addr, val, oi); + tcg_temp_free_i32(oi); + } +#else + gen(ret, tcg_ctx.tcg_env, addr, val); +#endif +#else + gen_helper_exit_atomic(tcg_ctx.tcg_env); +#endif /* CONFIG_ATOMIC64 */ + } else { + TCGv_i32 v32 = tcg_temp_new_i32(); + TCGv_i32 r32 = tcg_temp_new_i32(); + + tcg_gen_extrl_i64_i32(v32, val); + do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table); + tcg_temp_free_i32(v32); + + tcg_gen_extu_i32_i64(ret, r32); + tcg_temp_free_i32(r32); + + if (memop & MO_SIGN) { + tcg_gen_ext_i64(ret, ret, memop); + } + } +} + +#define GEN_ATOMIC_HELPER(NAME, OP, NEW) \ +static void * const table_##NAME[16] = { \ + [MO_8] = gen_helper_atomic_##NAME##b, \ + [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \ + [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \ + [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \ + [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \ + WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \ + WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \ +}; \ +void tcg_gen_atomic_##NAME##_i32 \ + (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, TCGMemOp memop) \ +{ \ + if (parallel_cpus) { \ + do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \ + } else { \ + do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \ + tcg_gen_##OP##_i32); \ + } \ +} \ +void tcg_gen_atomic_##NAME##_i64 \ + (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, TCGMemOp memop) \ +{ \ + if (parallel_cpus) { \ + do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \ + } else { \ + do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \ + tcg_gen_##OP##_i64); \ + } \ +} + +GEN_ATOMIC_HELPER(fetch_add, add, 0) +GEN_ATOMIC_HELPER(fetch_and, and, 0) +GEN_ATOMIC_HELPER(fetch_or, or, 0) +GEN_ATOMIC_HELPER(fetch_xor, xor, 0) + +GEN_ATOMIC_HELPER(add_fetch, add, 1) +GEN_ATOMIC_HELPER(and_fetch, and, 1) +GEN_ATOMIC_HELPER(or_fetch, or, 1) +GEN_ATOMIC_HELPER(xor_fetch, xor, 1) + +static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mov_i32(r, b); +} + +static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mov_i64(r, b); +} + +GEN_ATOMIC_HELPER(xchg, mov2, 0) + +#undef GEN_ATOMIC_HELPER diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 02cb376681..89b59e867a 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -854,6 +854,30 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) tcg_gen_qemu_st_i64(arg, addr, mem_index, MO_TEQ); } +void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32, + TCGArg, TCGMemOp); +void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64, + TCGArg, TCGMemOp); + +void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_and_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_and_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); + #if TARGET_LONG_BITS == 64 #define tcg_gen_movi_tl tcg_gen_movi_i64 #define tcg_gen_mov_tl tcg_gen_mov_i64 @@ -932,6 +956,16 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_sub2_tl tcg_gen_sub2_i64 #define tcg_gen_mulu2_tl tcg_gen_mulu2_i64 #define tcg_gen_muls2_tl tcg_gen_muls2_i64 +#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i64 +#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i64 +#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i64 +#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i64 +#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i64 +#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i64 +#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i64 +#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i64 +#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i64 +#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i64 #else #define tcg_gen_movi_tl tcg_gen_movi_i32 #define tcg_gen_mov_tl tcg_gen_mov_i32 @@ -1009,6 +1043,16 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_sub2_tl tcg_gen_sub2_i32 #define tcg_gen_mulu2_tl tcg_gen_mulu2_i32 #define tcg_gen_muls2_tl tcg_gen_muls2_i32 +#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i32 +#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i32 +#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i32 +#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i32 +#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i32 +#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i32 +#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i32 +#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i32 +#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i32 +#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i32 #endif #if UINTPTR_MAX == UINT32_MAX diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h index 23a0c37711..1deb86a099 100644 --- a/tcg/tcg-runtime.h +++ b/tcg/tcg-runtime.h @@ -14,3 +14,112 @@ DEF_HELPER_FLAGS_2(sar_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64) DEF_HELPER_FLAGS_2(mulsh_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64) DEF_HELPER_FLAGS_2(muluh_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) + +DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) + +#ifdef CONFIG_SOFTMMU + +DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG, + i32, env, tl, i32, i32, i32) +DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG, + i32, env, tl, i32, i32, i32) +DEF_HELPER_FLAGS_5(atomic_cmpxchgw_le, TCG_CALL_NO_WG, + i32, env, tl, i32, i32, i32) +DEF_HELPER_FLAGS_5(atomic_cmpxchgl_be, TCG_CALL_NO_WG, + i32, env, tl, i32, i32, i32) +DEF_HELPER_FLAGS_5(atomic_cmpxchgl_le, TCG_CALL_NO_WG, + i32, env, tl, i32, i32, i32) +#ifdef CONFIG_ATOMIC64 +DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG, + i64, env, tl, i64, i64, i32) +DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG, + i64, env, tl, i64, i64, i32) +#endif + +#ifdef CONFIG_ATOMIC64 +#define GEN_ATOMIC_HELPERS(NAME) \ + DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b), \ + TCG_CALL_NO_WG, i32, env, tl, i32, i32) \ + DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le), \ + TCG_CALL_NO_WG, i32, env, tl, i32, i32) \ + DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be), \ + TCG_CALL_NO_WG, i32, env, tl, i32, i32) \ + DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le), \ + TCG_CALL_NO_WG, i32, env, tl, i32, i32) \ + DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be), \ + TCG_CALL_NO_WG, i32, env, tl, i32, i32) \ + DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_le), \ + TCG_CALL_NO_WG, i64, env, tl, i64, i32) \ + DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_be), \ + TCG_CALL_NO_WG, i64, env, tl, i64, i32) +#else +#define GEN_ATOMIC_HELPERS(NAME) \ + DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b), \ + TCG_CALL_NO_WG, i32, env, tl, i32, i32) \ + DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le), \ + TCG_CALL_NO_WG, i32, env, tl, i32, i32) \ + DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be), \ + TCG_CALL_NO_WG, i32, env, tl, i32, i32) \ + DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le), \ + TCG_CALL_NO_WG, i32, env, tl, i32, i32) \ + DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be), \ + TCG_CALL_NO_WG, i32, env, tl, i32, i32) +#endif /* CONFIG_ATOMIC64 */ + +#else + +DEF_HELPER_FLAGS_4(atomic_cmpxchgb, TCG_CALL_NO_WG, i32, env, tl, i32, i32) +DEF_HELPER_FLAGS_4(atomic_cmpxchgw_be, TCG_CALL_NO_WG, i32, env, tl, i32, i32) +DEF_HELPER_FLAGS_4(atomic_cmpxchgw_le, TCG_CALL_NO_WG, i32, env, tl, i32, i32) +DEF_HELPER_FLAGS_4(atomic_cmpxchgl_be, TCG_CALL_NO_WG, i32, env, tl, i32, i32) +DEF_HELPER_FLAGS_4(atomic_cmpxchgl_le, TCG_CALL_NO_WG, i32, env, tl, i32, i32) +#ifdef CONFIG_ATOMIC64 +DEF_HELPER_FLAGS_4(atomic_cmpxchgq_be, TCG_CALL_NO_WG, i64, env, tl, i64, i64) +DEF_HELPER_FLAGS_4(atomic_cmpxchgq_le, TCG_CALL_NO_WG, i64, env, tl, i64, i64) +#endif + +#ifdef CONFIG_ATOMIC64 +#define GEN_ATOMIC_HELPERS(NAME) \ + DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), b), \ + TCG_CALL_NO_WG, i32, env, tl, i32) \ + DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_le), \ + TCG_CALL_NO_WG, i32, env, tl, i32) \ + DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_be), \ + TCG_CALL_NO_WG, i32, env, tl, i32) \ + DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_le), \ + TCG_CALL_NO_WG, i32, env, tl, i32) \ + DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_be), \ + TCG_CALL_NO_WG, i32, env, tl, i32) \ + DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), q_le), \ + TCG_CALL_NO_WG, i64, env, tl, i64) \ + DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), q_be), \ + TCG_CALL_NO_WG, i64, env, tl, i64) +#else +#define GEN_ATOMIC_HELPERS(NAME) \ + DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), b), \ + TCG_CALL_NO_WG, i32, env, tl, i32) \ + DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_le), \ + TCG_CALL_NO_WG, i32, env, tl, i32) \ + DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_be), \ + TCG_CALL_NO_WG, i32, env, tl, i32) \ + DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_le), \ + TCG_CALL_NO_WG, i32, env, tl, i32) \ + DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_be), \ + TCG_CALL_NO_WG, i32, env, tl, i32) +#endif /* CONFIG_ATOMIC64 */ + +#endif /* CONFIG_SOFTMMU */ + +GEN_ATOMIC_HELPERS(fetch_add) +GEN_ATOMIC_HELPERS(fetch_and) +GEN_ATOMIC_HELPERS(fetch_or) +GEN_ATOMIC_HELPERS(fetch_xor) + +GEN_ATOMIC_HELPERS(add_fetch) +GEN_ATOMIC_HELPERS(and_fetch) +GEN_ATOMIC_HELPERS(or_fetch) +GEN_ATOMIC_HELPERS(xor_fetch) + +GEN_ATOMIC_HELPERS(xchg) + +#undef GEN_ATOMIC_HELPERS diff --git a/tcg/tcg.h b/tcg/tcg.h index c9949aa79b..b34b5fbc2f 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -704,6 +704,7 @@ struct TCGContext { }; extern TCGContext tcg_ctx; +extern bool parallel_cpus; static inline void tcg_set_insn_param(int op_idx, int arg, TCGArg v) { @@ -1176,6 +1177,90 @@ uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr, # define helper_ret_ldq_cmmu helper_le_ldq_cmmu #endif +uint32_t helper_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + TCGMemOpIdx oi, uintptr_t retaddr); +uint32_t helper_atomic_cmpxchgw_le_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + TCGMemOpIdx oi, uintptr_t retaddr); +uint32_t helper_atomic_cmpxchgl_le_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + TCGMemOpIdx oi, uintptr_t retaddr); +uint64_t helper_atomic_cmpxchgq_le_mmu(CPUArchState *env, target_ulong addr, + uint64_t cmpv, uint64_t newv, + TCGMemOpIdx oi, uintptr_t retaddr); +uint32_t helper_atomic_cmpxchgw_be_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + TCGMemOpIdx oi, uintptr_t retaddr); +uint32_t helper_atomic_cmpxchgl_be_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + TCGMemOpIdx oi, uintptr_t retaddr); +uint64_t helper_atomic_cmpxchgq_be_mmu(CPUArchState *env, target_ulong addr, + uint64_t cmpv, uint64_t newv, + TCGMemOpIdx oi, uintptr_t retaddr); + +#define GEN_ATOMIC_HELPER(NAME, TYPE, SUFFIX) \ +TYPE helper_atomic_ ## NAME ## SUFFIX ## _mmu \ + (CPUArchState *env, target_ulong addr, TYPE val, \ + TCGMemOpIdx oi, uintptr_t retaddr); + +#ifdef CONFIG_ATOMIC64 +#define GEN_ATOMIC_HELPER_ALL(NAME) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, b) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, w_le) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, w_be) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, l_le) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, l_be) \ + GEN_ATOMIC_HELPER(NAME, uint64_t, q_le) \ + GEN_ATOMIC_HELPER(NAME, uint64_t, q_be) +#else +#define GEN_ATOMIC_HELPER_ALL(NAME) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, b) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, w_le) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, w_be) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, l_le) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, l_be) +#endif + +GEN_ATOMIC_HELPER_ALL(fetch_add) +GEN_ATOMIC_HELPER_ALL(fetch_sub) +GEN_ATOMIC_HELPER_ALL(fetch_and) +GEN_ATOMIC_HELPER_ALL(fetch_or) +GEN_ATOMIC_HELPER_ALL(fetch_xor) + +GEN_ATOMIC_HELPER_ALL(add_fetch) +GEN_ATOMIC_HELPER_ALL(sub_fetch) +GEN_ATOMIC_HELPER_ALL(and_fetch) +GEN_ATOMIC_HELPER_ALL(or_fetch) +GEN_ATOMIC_HELPER_ALL(xor_fetch) + +GEN_ATOMIC_HELPER_ALL(xchg) + +#undef GEN_ATOMIC_HELPER_ALL +#undef GEN_ATOMIC_HELPER #endif /* CONFIG_SOFTMMU */ +#ifdef CONFIG_ATOMIC128 +#include "qemu/int128.h" + +/* These aren't really a "proper" helpers because TCG cannot manage Int128. + However, use the same format as the others, for use by the backends. */ +Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr, + Int128 cmpv, Int128 newv, + TCGMemOpIdx oi, uintptr_t retaddr); +Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr, + Int128 cmpv, Int128 newv, + TCGMemOpIdx oi, uintptr_t retaddr); + +Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, + TCGMemOpIdx oi, uintptr_t retaddr); + +#endif /* CONFIG_ATOMIC128 */ + #endif /* TCG_H */ diff --git a/tests/.gitignore b/tests/.gitignore index 4aec8bc5fa..64e050e859 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -1,3 +1,4 @@ +atomic_add-bench check-qdict check-qfloat check-qint diff --git a/tests/Makefile.include b/tests/Makefile.include index cd058efc14..22656eaf26 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -460,7 +460,8 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \ tests/test-opts-visitor.o tests/test-qmp-event.o \ tests/rcutorture.o tests/test-rcu-list.o \ tests/test-qdist.o \ - tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o + tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o \ + tests/atomic_add-bench.o $(test-obj-y): QEMU_INCLUDES += -Itests QEMU_CFLAGS += -I$(SRC_PATH)/tests @@ -507,6 +508,7 @@ tests/test-qht$(EXESUF): tests/test-qht.o $(test-util-obj-y) tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF) $(test-util-obj-y) tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y) tests/test-bufferiszero$(EXESUF): tests/test-bufferiszero.o $(test-util-obj-y) +tests/atomic_add-bench$(EXESUF): tests/atomic_add-bench.o $(test-util-obj-y) tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \ hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\ diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c new file mode 100644 index 0000000000..caa1e8e689 --- /dev/null +++ b/tests/atomic_add-bench.c @@ -0,0 +1,163 @@ +#include "qemu/osdep.h" +#include "qemu/thread.h" +#include "qemu/host-utils.h" +#include "qemu/processor.h" + +struct thread_info { + uint64_t r; +} QEMU_ALIGNED(64); + +struct count { + unsigned long val; +} QEMU_ALIGNED(64); + +static QemuThread *threads; +static struct thread_info *th_info; +static unsigned int n_threads = 1; +static unsigned int n_ready_threads; +static struct count *counts; +static unsigned int duration = 1; +static unsigned int range = 1024; +static bool test_start; +static bool test_stop; + +static const char commands_string[] = + " -n = number of threads\n" + " -d = duration in seconds\n" + " -r = range (will be rounded up to pow2)"; + +static void usage_complete(char *argv[]) +{ + fprintf(stderr, "Usage: %s [options]\n", argv[0]); + fprintf(stderr, "options:\n%s\n", commands_string); +} + +/* + * From: https://en.wikipedia.org/wiki/Xorshift + * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only + * guaranteed to be >= INT_MAX). + */ +static uint64_t xorshift64star(uint64_t x) +{ + x ^= x >> 12; /* a */ + x ^= x << 25; /* b */ + x ^= x >> 27; /* c */ + return x * UINT64_C(2685821657736338717); +} + +static void *thread_func(void *arg) +{ + struct thread_info *info = arg; + + atomic_inc(&n_ready_threads); + while (!atomic_read(&test_start)) { + cpu_relax(); + } + + while (!atomic_read(&test_stop)) { + unsigned int index; + + info->r = xorshift64star(info->r); + index = info->r & (range - 1); + atomic_inc(&counts[index].val); + } + return NULL; +} + +static void run_test(void) +{ + unsigned int remaining; + unsigned int i; + + while (atomic_read(&n_ready_threads) != n_threads) { + cpu_relax(); + } + atomic_set(&test_start, true); + do { + remaining = sleep(duration); + } while (remaining); + atomic_set(&test_stop, true); + + for (i = 0; i < n_threads; i++) { + qemu_thread_join(&threads[i]); + } +} + +static void create_threads(void) +{ + unsigned int i; + + threads = g_new(QemuThread, n_threads); + th_info = g_new(struct thread_info, n_threads); + counts = qemu_memalign(64, sizeof(*counts) * range); + memset(counts, 0, sizeof(*counts) * range); + + for (i = 0; i < n_threads; i++) { + struct thread_info *info = &th_info[i]; + + info->r = (i + 1) ^ time(NULL); + qemu_thread_create(&threads[i], NULL, thread_func, info, + QEMU_THREAD_JOINABLE); + } +} + +static void pr_params(void) +{ + printf("Parameters:\n"); + printf(" # of threads: %u\n", n_threads); + printf(" duration: %u\n", duration); + printf(" ops' range: %u\n", range); +} + +static void pr_stats(void) +{ + unsigned long long val = 0; + unsigned int i; + double tx; + + for (i = 0; i < range; i++) { + val += counts[i].val; + } + tx = val / duration / 1e6; + + printf("Results:\n"); + printf("Duration: %u s\n", duration); + printf(" Throughput: %.2f Mops/s\n", tx); + printf(" Throughput/thread: %.2f Mops/s/thread\n", tx / n_threads); +} + +static void parse_args(int argc, char *argv[]) +{ + int c; + + for (;;) { + c = getopt(argc, argv, "hd:n:r:"); + if (c < 0) { + break; + } + switch (c) { + case 'h': + usage_complete(argv); + exit(0); + case 'd': + duration = atoi(optarg); + break; + case 'n': + n_threads = atoi(optarg); + break; + case 'r': + range = pow2ceil(atoi(optarg)); + break; + } + } +} + +int main(int argc, char *argv[]) +{ + parse_args(argc, argv); + pr_params(); + create_threads(); + run_test(); + pr_stats(); + return 0; +} diff --git a/tests/test-int128.c b/tests/test-int128.c index 4390123bd3..b86a3c76e6 100644 --- a/tests/test-int128.c +++ b/tests/test-int128.c @@ -41,7 +41,7 @@ static Int128 expand(uint32_t x) uint64_t l, h; l = expand16(x & 65535); h = expand16(x >> 16); - return (Int128) {l, h}; + return (Int128) int128_make128(l, h); }; static void test_and(void) @@ -54,8 +54,8 @@ static void test_and(void) Int128 b = expand(tests[j]); Int128 r = expand(tests[i] & tests[j]); Int128 s = int128_and(a, b); - g_assert_cmpuint(r.lo, ==, s.lo); - g_assert_cmpuint(r.hi, ==, s.hi); + g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s)); + g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s)); } } } @@ -70,8 +70,8 @@ static void test_add(void) Int128 b = expand(tests[j]); Int128 r = expand(tests[i] + tests[j]); Int128 s = int128_add(a, b); - g_assert_cmpuint(r.lo, ==, s.lo); - g_assert_cmpuint(r.hi, ==, s.hi); + g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s)); + g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s)); } } } @@ -86,8 +86,8 @@ static void test_sub(void) Int128 b = expand(tests[j]); Int128 r = expand(tests[i] - tests[j]); Int128 s = int128_sub(a, b); - g_assert_cmpuint(r.lo, ==, s.lo); - g_assert_cmpuint(r.hi, ==, s.hi); + g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s)); + g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s)); } } } @@ -100,8 +100,8 @@ static void test_neg(void) Int128 a = expand(tests[i]); Int128 r = expand(-tests[i]); Int128 s = int128_neg(a); - g_assert_cmpuint(r.lo, ==, s.lo); - g_assert_cmpuint(r.hi, ==, s.hi); + g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s)); + g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s)); } } @@ -180,8 +180,8 @@ test_rshift_one(uint32_t x, int n, uint64_t h, uint64_t l) { Int128 a = expand(x); Int128 r = int128_rshift(a, n); - g_assert_cmpuint(r.lo, ==, l); - g_assert_cmpuint(r.hi, ==, h); + g_assert_cmpuint(int128_getlo(r), ==, l); + g_assert_cmpuint(int128_gethi(r), ==, h); } static void test_rshift(void) diff --git a/translate-all.c b/translate-all.c index 86b45a19c5..76fc18c98f 100644 --- a/translate-all.c +++ b/translate-all.c @@ -118,6 +118,7 @@ static void *l1_map[V_L1_MAX_SIZE]; /* code generation context */ TCGContext tcg_ctx; +bool parallel_cpus; /* translation block context */ #ifdef CONFIG_USER_ONLY |