diff options
114 files changed, 5269 insertions, 2082 deletions
diff --git a/Makefile b/Makefile index 1cd5bc80d5..971e92fd79 100644 --- a/Makefile +++ b/Makefile @@ -157,7 +157,8 @@ tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \ iohandler.o cutils.o iov.o async.o tools-obj-$(CONFIG_POSIX) += compatfd.o -qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y) +qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y) $(qapi-obj-y) \ + qapi-visit.o qapi-types.o qemu-nbd$(EXESUF): qemu-nbd.o $(tools-obj-y) $(block-obj-y) qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y) diff --git a/arch_init.c b/arch_init.c index 5a1173e29a..f849f9b872 100644 --- a/arch_init.c +++ b/arch_init.c @@ -562,7 +562,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) if ((i & 63) == 0) { uint64_t t1 = (qemu_get_clock_ns(rt_clock) - bwidth) / 1000000; if (t1 > MAX_WAIT) { - DPRINTF("big wait: " PRIu64 " milliseconds, %d iterations\n", + DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", t1, i); break; } @@ -587,7 +587,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; - DPRINTF("ram_save_live: expected(" PRIu64 ") <= max(" PRIu64 ")?\n", + DPRINTF("ram_save_live: expected(%" PRIu64 ") <= max(%" PRIu64 ")?\n", expected_time, migrate_max_downtime()); if (expected_time <= migrate_max_downtime()) { @@ -799,8 +799,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) } while (!(flags & RAM_SAVE_FLAG_EOS)); done: - DPRINTF("Completed load of VM with exit code %d seq iteration " PRIu64 "\n", - ret, seq_iter); + DPRINTF("Completed load of VM with exit code %d seq iteration " + "%" PRIu64 "\n", ret, seq_iter); return ret; } @@ -922,11 +922,16 @@ void select_soundhw(const char *optarg) if (is_help_option(optarg)) { show_valid_cards: +#ifdef HAS_AUDIO_CHOICE printf("Valid sound card names (comma separated):\n"); for (c = soundhw; c->name; ++c) { printf ("%-11s %s\n", c->name, c->descr); } printf("\n-soundhw all will enable all of the above\n"); +#else + printf("Machine has no user-selectable audio hardware " + "(it may or may not have always-present audio hardware).\n"); +#endif exit(!is_help_option(optarg)); } else { diff --git a/audio/audio_template.h b/audio/audio_template.h index 519432a7c5..16f7880770 100644 --- a/audio/audio_template.h +++ b/audio/audio_template.h @@ -410,15 +410,15 @@ SW *glue (AUD_open_, TYPE) ( SW *old_sw = NULL; #endif - ldebug ("open %s, freq %d, nchannels %d, fmt %d\n", - name, as->freq, as->nchannels, as->fmt); - if (audio_bug (AUDIO_FUNC, !card || !name || !callback_fn || !as)) { dolog ("card=%p name=%p callback_fn=%p as=%p\n", card, name, callback_fn, as); goto fail; } + ldebug ("open %s, freq %d, nchannels %d, fmt %d\n", + name, as->freq, as->nchannels, as->fmt); + if (audio_bug (AUDIO_FUNC, audio_validate_settings (as))) { audio_print_settings (as); goto fail; diff --git a/block.c b/block.c index 470bdcc1f6..e78039bd5a 100644 --- a/block.c +++ b/block.c @@ -433,7 +433,11 @@ int get_tmp_filename(char *filename, int size) return -EOVERFLOW; } fd = mkstemp(filename); - if (fd < 0 || close(fd)) { + if (fd < 0) { + return -errno; + } + if (close(fd) != 0) { + unlink(filename); return -errno; } return 0; @@ -897,10 +901,10 @@ void bdrv_close(BlockDriverState *bs) bdrv_delete(bs->file); bs->file = NULL; } - - bdrv_dev_change_media_cb(bs, false); } + bdrv_dev_change_media_cb(bs, false); + /*throttling disk I/O limits*/ if (bs->io_limits_enabled) { bdrv_io_limits_disable(bs); diff --git a/block/curl.c b/block/curl.c index e7c3634d35..c1074cd2e3 100644 --- a/block/curl.c +++ b/block/curl.c @@ -542,8 +542,7 @@ static void curl_close(BlockDriverState *bs) } if (s->multi) curl_multi_cleanup(s->multi); - if (s->url) - free(s->url); + g_free(s->url); } static int64_t curl_getlength(BlockDriverState *bs) diff --git a/block/sheepdog.c b/block/sheepdog.c index df4f44107b..e0753ee9e5 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1986,7 +1986,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, vdi_index = pos / SD_DATA_OBJ_SIZE; offset = pos % SD_DATA_OBJ_SIZE; - data_len = MIN(remaining, SD_DATA_OBJ_SIZE); + data_len = MIN(remaining, SD_DATA_OBJ_SIZE - offset); vmstate_oid = vid_to_vmstate_oid(s->inode.vdi_id, vdi_index); @@ -2007,6 +2007,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, } pos += data_len; + data += data_len; remaining -= data_len; } ret = size; diff --git a/block/vdi.c b/block/vdi.c index c4f1529db9..550cf58a39 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -628,7 +628,6 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options) VdiHeader header; size_t i; size_t bmap_size; - uint32_t *bmap; logout("\n"); @@ -693,21 +692,21 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options) result = -errno; } - bmap = NULL; if (bmap_size > 0) { - bmap = (uint32_t *)g_malloc0(bmap_size); - } - for (i = 0; i < blocks; i++) { - if (image_type == VDI_TYPE_STATIC) { - bmap[i] = i; - } else { - bmap[i] = VDI_UNALLOCATED; + uint32_t *bmap = g_malloc0(bmap_size); + for (i = 0; i < blocks; i++) { + if (image_type == VDI_TYPE_STATIC) { + bmap[i] = i; + } else { + bmap[i] = VDI_UNALLOCATED; + } } + if (write(fd, bmap, bmap_size) < 0) { + result = -errno; + } + g_free(bmap); } - if (write(fd, bmap, bmap_size) < 0) { - result = -errno; - } - g_free(bmap); + if (image_type == VDI_TYPE_STATIC) { if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) { result = -errno; diff --git a/configure b/configure index 7e23309dd4..1b865174e1 100755 --- a/configure +++ b/configure @@ -111,7 +111,6 @@ source_path=`dirname "$0"` cpu="" interp_prefix="/usr/gnemul/qemu-%M" static="no" -sparc_cpu="" cross_prefix="" audio_drv_list="" audio_card_list="ac97 es1370 sb16 hda" @@ -126,7 +125,7 @@ cc_i386=i386-pc-linux-gnu-gcc libs_qga="" debug_info="yes" -target_list="DEFAULT" +target_list="" # Default value for a variable defining feature "foo". # * foo="no" feature will only be used if --enable-foo arg is given @@ -241,21 +240,6 @@ for opt do ;; --disable-debug-info) debug_info="no" ;; - --sparc_cpu=*) - sparc_cpu="$optarg" - case $sparc_cpu in - v7|v8|v8plus|v8plusa) - cpu="sparc" - ;; - v9) - cpu="sparc64" - ;; - *) - echo "undefined SPARC architecture. Exiting"; - exit 1 - ;; - esac - ;; esac done # OS specific @@ -343,8 +327,6 @@ elif check_define __i386__ ; then elif check_define __x86_64__ ; then cpu="x86_64" elif check_define __sparc__ ; then - # We can't check for 64 bit (when gcc is biarch) or V8PLUSA - # They must be specified using --sparc_cpu if check_define __arch64__ ; then cpu="sparc64" else @@ -792,8 +774,6 @@ for opt do ;; --enable-uname-release=*) uname_release="$optarg" ;; - --sparc_cpu=*) - ;; --enable-werror) werror="yes" ;; --disable-werror) werror="no" @@ -881,35 +861,17 @@ for opt do esac done -# -# If cpu ~= sparc and sparc_cpu hasn't been defined, plug in the right -# QEMU_CFLAGS/LDFLAGS (assume sparc_v8plus for 32-bit and sparc_v9 for 64-bit) -# host_guest_base="no" case "$cpu" in - sparc) case $sparc_cpu in - v7|v8) - QEMU_CFLAGS="-mcpu=${sparc_cpu} -D__sparc_${sparc_cpu}__ $QEMU_CFLAGS" - ;; - v8plus|v8plusa) - QEMU_CFLAGS="-mcpu=ultrasparc -D__sparc_${sparc_cpu}__ $QEMU_CFLAGS" - ;; - *) # sparc_cpu not defined in the command line - QEMU_CFLAGS="-mcpu=ultrasparc -D__sparc_v8plus__ $QEMU_CFLAGS" - esac + sparc) LDFLAGS="-m32 $LDFLAGS" - QEMU_CFLAGS="-m32 -ffixed-g2 -ffixed-g3 $QEMU_CFLAGS" - if test "$solaris" = "no" ; then - QEMU_CFLAGS="-ffixed-g1 -ffixed-g6 $QEMU_CFLAGS" - fi + QEMU_CFLAGS="-m32 -mcpu=ultrasparc $QEMU_CFLAGS" + host_guest_base="yes" ;; sparc64) - QEMU_CFLAGS="-m64 -mcpu=ultrasparc -D__sparc_v9__ $QEMU_CFLAGS" LDFLAGS="-m64 $LDFLAGS" - QEMU_CFLAGS="-ffixed-g5 -ffixed-g6 -ffixed-g7 $QEMU_CFLAGS" - if test "$solaris" != "no" ; then - QEMU_CFLAGS="-ffixed-g1 $QEMU_CFLAGS" - fi + QEMU_CFLAGS="-m64 -mcpu=ultrasparc $QEMU_CFLAGS" + host_guest_base="yes" ;; s390) QEMU_CFLAGS="-m31 -march=z990 $QEMU_CFLAGS" @@ -1319,10 +1281,15 @@ if ! "$python" -c 'import sys; sys.exit(sys.version_info < (2,4) or sys.version_ exit 1 fi -if test "$target_list" = "DEFAULT" ; then - target_list=`echo "$default_target_list" | sed -e 's/,/ /g'` +if test -z "$target_list" ; then + target_list="$default_target_list" +else + target_list=`echo "$target_list" | sed -e 's/,/ /g'` +fi +if test -z "$target_list" ; then + echo "No targets enabled" + exit 1 fi - # see if system emulation was really requested case " $target_list " in *"-softmmu "*) softmmu=yes @@ -1428,10 +1395,10 @@ if test "$seccomp" != "no" ; then LIBS=`$pkg_config --libs libseccomp` seccomp="yes" else - seccomp="no" if test "$seccomp" = "yes"; then feature_not_found "libseccomp" fi + seccomp="no" fi fi ########################################## @@ -2733,6 +2700,9 @@ EOF if $pkg_config --atleast-version=0.12.0 spice-protocol >/dev/null 2>&1; then spice_qxl_io_monitors_config_async="yes" fi + if $pkg_config --atleast-version=0.12.2 spice-protocol > /dev/null 2>&1; then + spice_qxl_client_monitors_config="yes" + fi else if test "$spice" = "yes" ; then feature_not_found "spice" @@ -2787,7 +2757,7 @@ if test "$usb_redir" != "no" ; then usb_redir_cflags=$($pkg_config --cflags libusbredirparser 2>/dev/null) usb_redir_libs=$($pkg_config --libs libusbredirparser 2>/dev/null) QEMU_CFLAGS="$QEMU_CFLAGS $usb_redir_cflags" - LIBS="$LIBS $usb_redir_libs" + libs_softmmu="$libs_softmmu $usb_redir_libs" else if test "$usb_redir" = "yes"; then feature_not_found "usb-redir" @@ -3480,6 +3450,10 @@ if test "$spice_qxl_io_monitors_config_async" = "yes" ; then echo "CONFIG_QXL_IO_MONITORS_CONFIG_ASYNC=y" >> $config_host_mak fi +if test "$spice_qxl_client_monitors_config" = "yes" ; then + echo "CONFIG_QXL_CLIENT_MONITORS_CONFIG=y" >> $config_host_mak +fi + if test "$smartcard" = "yes" ; then echo "CONFIG_SMARTCARD=y" >> $config_host_mak fi @@ -4119,10 +4093,6 @@ fi if test "$target_linux_user" = "yes" -o "$target_bsd_user" = "yes" ; then case "$ARCH" in - sparc) - # -static is used to avoid g1/g3 usage by the dynamic linker - ldflags="$linker_script -static $ldflags" - ;; alpha | s390x) # The default placement of the application is fine. ;; diff --git a/console.c b/console.c index c1ed5e09e0..a8bcc42fe4 100644 --- a/console.c +++ b/console.c @@ -1612,7 +1612,7 @@ PixelFormat qemu_different_endianness_pixelformat(int bpp) memset(&pf, 0x00, sizeof(PixelFormat)); pf.bits_per_pixel = bpp; - pf.bytes_per_pixel = bpp / 8; + pf.bytes_per_pixel = DIV_ROUND_UP(bpp, 8); pf.depth = bpp == 32 ? 24 : bpp; switch (bpp) { @@ -1661,13 +1661,12 @@ PixelFormat qemu_default_pixelformat(int bpp) memset(&pf, 0x00, sizeof(PixelFormat)); pf.bits_per_pixel = bpp; - pf.bytes_per_pixel = bpp / 8; + pf.bytes_per_pixel = DIV_ROUND_UP(bpp, 8); pf.depth = bpp == 32 ? 24 : bpp; switch (bpp) { case 15: pf.bits_per_pixel = 16; - pf.bytes_per_pixel = 2; pf.rmask = 0x00007c00; pf.gmask = 0x000003E0; pf.bmask = 0x0000001F; diff --git a/disas.c b/disas.c index 7b2acc9943..b801c8f51d 100644 --- a/disas.c +++ b/disas.c @@ -316,9 +316,7 @@ void disas(FILE *out, void *code, unsigned long size) print_insn = print_insn_alpha; #elif defined(__sparc__) print_insn = print_insn_sparc; -#if defined(__sparc_v8plus__) || defined(__sparc_v8plusa__) || defined(__sparc_v9__) disasm_info.mach = bfd_mach_sparc_v9b; -#endif #elif defined(__arm__) print_insn = print_insn_arm; #elif defined(__MIPSEB__) diff --git a/docs/specs/ppc-spapr-hcalls.txt b/docs/specs/ppc-spapr-hcalls.txt index 52ba8d42ab..667b3fa00e 100644 --- a/docs/specs/ppc-spapr-hcalls.txt +++ b/docs/specs/ppc-spapr-hcalls.txt @@ -31,7 +31,7 @@ Arguments: Returns: - H_SUCCESS : Successully called the RTAS function (RTAS result + H_SUCCESS : Successfully called the RTAS function (RTAS result will have been stored in the parameter block) H_PARAMETER : Unknown token diff --git a/docs/usb2.txt b/docs/usb2.txt index d17e3c0044..43dacdec28 100644 --- a/docs/usb2.txt +++ b/docs/usb2.txt @@ -58,11 +58,11 @@ try ... xhci controller support ----------------------- -There also is xhci host controller support available. It got alot +There is also xhci host controller support available. It got a lot less testing than ehci and there are a bunch of known limitations, so ehci may work better for you. On the other hand the xhci hardware design is much more virtualization-friendly, thus xhci emulation uses -less ressources (especially cpu). If you wanna give xhci a try +less resources (especially cpu). If you want to give xhci a try use this to add the host controller ... qemu -device nec-usb-xhci,id=xhci diff --git a/exec-all.h b/exec-all.h index dba96098b8..6516da071d 100644 --- a/exec-all.h +++ b/exec-all.h @@ -132,9 +132,10 @@ static inline void tlb_flush(CPUArchState *env, int flush_global) #define CODE_GEN_AVG_BLOCK_SIZE 64 #endif -#if defined(_ARCH_PPC) || defined(__x86_64__) || defined(__arm__) || defined(__i386__) -#define USE_DIRECT_JUMP -#elif defined(CONFIG_TCG_INTERPRETER) +#if defined(__arm__) || defined(_ARCH_PPC) \ + || defined(__x86_64__) || defined(__i386__) \ + || defined(__sparc__) \ + || defined(CONFIG_TCG_INTERPRETER) #define USE_DIRECT_JUMP #endif @@ -244,6 +245,8 @@ static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg)); #endif } +#elif defined(__sparc__) +void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr); #else #error tb_set_jmp_target1 is missing #endif diff --git a/exec.c b/exec.c index 58347664b2..bb6aa4a070 100644 --- a/exec.c +++ b/exec.c @@ -86,7 +86,7 @@ static int nb_tbs; /* any access to the tbs or the page table must use this lock */ spinlock_t tb_lock = SPIN_LOCK_UNLOCKED; -#if defined(__arm__) || defined(__sparc_v9__) +#if defined(__arm__) || defined(__sparc__) /* The prologue must be reachable with a direct jump. ARM and Sparc64 have limited branch ranges (possibly also PPC) so place it in a section close to code segment. */ @@ -541,10 +541,9 @@ static void code_gen_alloc(unsigned long tb_size) /* Cannot map more than that */ if (code_gen_buffer_size > (800 * 1024 * 1024)) code_gen_buffer_size = (800 * 1024 * 1024); -#elif defined(__sparc_v9__) +#elif defined(__sparc__) && HOST_LONG_BITS == 64 // Map the buffer below 2G, so we can use direct calls and branches - flags |= MAP_FIXED; - start = (void *) 0x60000000UL; + start = (void *) 0x40000000UL; if (code_gen_buffer_size > (512 * 1024 * 1024)) code_gen_buffer_size = (512 * 1024 * 1024); #elif defined(__arm__) @@ -582,10 +581,9 @@ static void code_gen_alloc(unsigned long tb_size) /* Cannot map more than that */ if (code_gen_buffer_size > (800 * 1024 * 1024)) code_gen_buffer_size = (800 * 1024 * 1024); -#elif defined(__sparc_v9__) +#elif defined(__sparc__) && HOST_LONG_BITS == 64 // Map the buffer below 2G, so we can use direct calls and branches - flags |= MAP_FIXED; - addr = (void *) 0x60000000UL; + addr = (void *) 0x40000000UL; if (code_gen_buffer_size > (512 * 1024 * 1024)) { code_gen_buffer_size = (512 * 1024 * 1024); } @@ -2525,6 +2523,19 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev) } } +static int memory_try_enable_merging(void *addr, size_t len) +{ + QemuOpts *opts; + + opts = qemu_opts_find(qemu_find_opts("machine"), 0); + if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) { + /* disabled by the user */ + return 0; + } + + return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE); +} + ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, MemoryRegion *mr) { @@ -2544,7 +2555,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, new_block->host = file_ram_alloc(new_block, size, mem_path); if (!new_block->host) { new_block->host = qemu_vmalloc(size); - qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE); + memory_try_enable_merging(new_block->host, size); } #else fprintf(stderr, "-mem-path option unsupported\n"); @@ -2559,7 +2570,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, } else { new_block->host = qemu_vmalloc(size); } - qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE); + memory_try_enable_merging(new_block->host, size); } } new_block->length = size; @@ -2689,7 +2700,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) length, addr); exit(1); } - qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE); + memory_try_enable_merging(vaddr, length); qemu_ram_setup_dump(vaddr, length); } return; @@ -3523,6 +3534,13 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr, /* ROM/RAM case */ ptr = qemu_get_ram_ptr(addr1); memcpy(ptr, buf, l); + if (!cpu_physical_memory_is_dirty(addr1)) { + /* invalidate code */ + tb_invalidate_phys_page_range(addr1, addr1 + l, 0); + /* set dirty bit */ + cpu_physical_memory_set_dirty_flags( + addr1, (0xff & ~CODE_DIRTY_FLAG)); + } qemu_put_ram_ptr(ptr); } len -= l; diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h index 490245004f..a1d489e425 100644 --- a/fpu/softfloat-specialize.h +++ b/fpu/softfloat-specialize.h @@ -41,6 +41,13 @@ these four paragraphs for those parts of this code that are retained. #define SNAN_BIT_IS_ONE 0 #endif +#if defined(TARGET_XTENSA) +/* Define for architectures which deviate from IEEE in not supporting + * signaling NaNs (so all NaNs are treated as quiet). + */ +#define NO_SIGNALING_NANS 1 +#endif + /*---------------------------------------------------------------------------- | The pattern for a default generated half-precision NaN. *----------------------------------------------------------------------------*/ @@ -57,7 +64,8 @@ const float16 float16_default_nan = const_float16(0xFE00); *----------------------------------------------------------------------------*/ #if defined(TARGET_SPARC) const float32 float32_default_nan = const_float32(0x7FFFFFFF); -#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) +#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \ + defined(TARGET_XTENSA) const float32 float32_default_nan = const_float32(0x7FC00000); #elif SNAN_BIT_IS_ONE const float32 float32_default_nan = const_float32(0x7FBFFFFF); @@ -127,6 +135,17 @@ typedef struct { uint64_t high, low; } commonNaNT; +#ifdef NO_SIGNALING_NANS +int float16_is_quiet_nan(float16 a_) +{ + return float16_is_any_nan(a_); +} + +int float16_is_signaling_nan(float16 a_) +{ + return 0; +} +#else /*---------------------------------------------------------------------------- | Returns 1 if the half-precision floating-point value `a' is a quiet | NaN; otherwise returns 0. @@ -156,6 +175,7 @@ int float16_is_signaling_nan(float16 a_) return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF); #endif } +#endif /*---------------------------------------------------------------------------- | Returns a quiet NaN if the half-precision floating point value `a' is a @@ -217,6 +237,17 @@ static float16 commonNaNToFloat16(commonNaNT a STATUS_PARAM) } } +#ifdef NO_SIGNALING_NANS +int float32_is_quiet_nan(float32 a_) +{ + return float32_is_any_nan(a_); +} + +int float32_is_signaling_nan(float32 a_) +{ + return 0; +} +#else /*---------------------------------------------------------------------------- | Returns 1 if the single-precision floating-point value `a' is a quiet | NaN; otherwise returns 0. @@ -246,6 +277,7 @@ int float32_is_signaling_nan( float32 a_ ) return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); #endif } +#endif /*---------------------------------------------------------------------------- | Returns a quiet NaN if the single-precision floating point value `a' is a @@ -372,7 +404,7 @@ static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, return 1; } } -#elif defined(TARGET_PPC) +#elif defined(TARGET_PPC) || defined(TARGET_XTENSA) static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, flag aIsLargerSignificand) { @@ -586,6 +618,17 @@ static float32 propagateFloat32MulAddNaN(float32 a, float32 b, } } +#ifdef NO_SIGNALING_NANS +int float64_is_quiet_nan(float64 a_) +{ + return float64_is_any_nan(a_); +} + +int float64_is_signaling_nan(float64 a_) +{ + return 0; +} +#else /*---------------------------------------------------------------------------- | Returns 1 if the double-precision floating-point value `a' is a quiet | NaN; otherwise returns 0. @@ -619,6 +662,7 @@ int float64_is_signaling_nan( float64 a_ ) && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); #endif } +#endif /*---------------------------------------------------------------------------- | Returns a quiet NaN if the double-precision floating point value `a' is a @@ -773,6 +817,17 @@ static float64 propagateFloat64MulAddNaN(float64 a, float64 b, } } +#ifdef NO_SIGNALING_NANS +int floatx80_is_quiet_nan(floatx80 a_) +{ + return floatx80_is_any_nan(a_); +} + +int floatx80_is_signaling_nan(floatx80 a_) +{ + return 0; +} +#else /*---------------------------------------------------------------------------- | Returns 1 if the extended double-precision floating-point value `a' is a | quiet NaN; otherwise returns 0. This slightly differs from the same @@ -816,6 +871,7 @@ int floatx80_is_signaling_nan( floatx80 a ) && ( a.low == aLow ); #endif } +#endif /*---------------------------------------------------------------------------- | Returns a quiet NaN if the extended double-precision floating point value @@ -929,6 +985,17 @@ static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b STATUS_PARAM) } } +#ifdef NO_SIGNALING_NANS +int float128_is_quiet_nan(float128 a_) +{ + return float128_is_any_nan(a_); +} + +int float128_is_signaling_nan(float128 a_) +{ + return 0; +} +#else /*---------------------------------------------------------------------------- | Returns 1 if the quadruple-precision floating-point value `a' is a quiet | NaN; otherwise returns 0. @@ -964,6 +1031,7 @@ int float128_is_signaling_nan( float128 a ) && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) ); #endif } +#endif /*---------------------------------------------------------------------------- | Returns a quiet NaN if the quadruple-precision floating point value `a' is diff --git a/fpu/softfloat.h b/fpu/softfloat.h index feec3a180e..d8999b311a 100644 --- a/fpu/softfloat.h +++ b/fpu/softfloat.h @@ -219,7 +219,7 @@ void float_raise( int8 flags STATUS_PARAM); enum { float_muladd_negate_c = 1, float_muladd_negate_product = 2, - float_muladd_negate_result = 3, + float_muladd_negate_result = 4, }; /*---------------------------------------------------------------------------- @@ -251,6 +251,11 @@ int float16_is_quiet_nan( float16 ); int float16_is_signaling_nan( float16 ); float16 float16_maybe_silence_nan( float16 ); +INLINE int float16_is_any_nan(float16 a) +{ + return ((float16_val(a) & ~0x8000) > 0x7c00); +} + /*---------------------------------------------------------------------------- | The pattern for a default generated half-precision NaN. *----------------------------------------------------------------------------*/ diff --git a/gdbstub.c b/gdbstub.c index 5d37dd98f4..d02ec75384 100644 --- a/gdbstub.c +++ b/gdbstub.c @@ -1226,33 +1226,48 @@ static int cpu_gdb_write_register(CPUOpenRISCState *env, static int cpu_gdb_read_register(CPUSH4State *env, uint8_t *mem_buf, int n) { - if (n < 8) { + switch (n) { + case 0 ... 7: if ((env->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB)) { GET_REGL(env->gregs[n + 16]); } else { GET_REGL(env->gregs[n]); } - } else if (n < 16) { + case 8 ... 15: GET_REGL(env->gregs[n]); - } else if (n >= 25 && n < 41) { - GET_REGL(env->fregs[(n - 25) + ((env->fpscr & FPSCR_FR) ? 16 : 0)]); - } else if (n >= 43 && n < 51) { - GET_REGL(env->gregs[n - 43]); - } else if (n >= 51 && n < 59) { - GET_REGL(env->gregs[n - (51 - 16)]); - } - switch (n) { - case 16: GET_REGL(env->pc); - case 17: GET_REGL(env->pr); - case 18: GET_REGL(env->gbr); - case 19: GET_REGL(env->vbr); - case 20: GET_REGL(env->mach); - case 21: GET_REGL(env->macl); - case 22: GET_REGL(env->sr); - case 23: GET_REGL(env->fpul); - case 24: GET_REGL(env->fpscr); - case 41: GET_REGL(env->ssr); - case 42: GET_REGL(env->spc); + case 16: + GET_REGL(env->pc); + case 17: + GET_REGL(env->pr); + case 18: + GET_REGL(env->gbr); + case 19: + GET_REGL(env->vbr); + case 20: + GET_REGL(env->mach); + case 21: + GET_REGL(env->macl); + case 22: + GET_REGL(env->sr); + case 23: + GET_REGL(env->fpul); + case 24: + GET_REGL(env->fpscr); + case 25 ... 40: + if (env->fpscr & FPSCR_FR) { + stfl_p(mem_buf, env->fregs[n - 9]); + } else { + stfl_p(mem_buf, env->fregs[n - 25]); + } + return 4; + case 41: + GET_REGL(env->ssr); + case 42: + GET_REGL(env->spc); + case 43 ... 50: + GET_REGL(env->gregs[n - 43]); + case 51 ... 58: + GET_REGL(env->gregs[n - (51 - 16)]); } return 0; @@ -1260,42 +1275,63 @@ static int cpu_gdb_read_register(CPUSH4State *env, uint8_t *mem_buf, int n) static int cpu_gdb_write_register(CPUSH4State *env, uint8_t *mem_buf, int n) { - uint32_t tmp; - - tmp = ldl_p(mem_buf); - - if (n < 8) { + switch (n) { + case 0 ... 7: if ((env->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB)) { - env->gregs[n + 16] = tmp; + env->gregs[n + 16] = ldl_p(mem_buf); } else { - env->gregs[n] = tmp; + env->gregs[n] = ldl_p(mem_buf); } - return 4; - } else if (n < 16) { - env->gregs[n] = tmp; - return 4; - } else if (n >= 25 && n < 41) { - env->fregs[(n - 25) + ((env->fpscr & FPSCR_FR) ? 16 : 0)] = tmp; - return 4; - } else if (n >= 43 && n < 51) { - env->gregs[n - 43] = tmp; - return 4; - } else if (n >= 51 && n < 59) { - env->gregs[n - (51 - 16)] = tmp; - return 4; - } - switch (n) { - case 16: env->pc = tmp; break; - case 17: env->pr = tmp; break; - case 18: env->gbr = tmp; break; - case 19: env->vbr = tmp; break; - case 20: env->mach = tmp; break; - case 21: env->macl = tmp; break; - case 22: env->sr = tmp; break; - case 23: env->fpul = tmp; break; - case 24: env->fpscr = tmp; break; - case 41: env->ssr = tmp; break; - case 42: env->spc = tmp; break; + break; + case 8 ... 15: + env->gregs[n] = ldl_p(mem_buf); + break; + case 16: + env->pc = ldl_p(mem_buf); + break; + case 17: + env->pr = ldl_p(mem_buf); + break; + case 18: + env->gbr = ldl_p(mem_buf); + break; + case 19: + env->vbr = ldl_p(mem_buf); + break; + case 20: + env->mach = ldl_p(mem_buf); + break; + case 21: + env->macl = ldl_p(mem_buf); + break; + case 22: + env->sr = ldl_p(mem_buf); + break; + case 23: + env->fpul = ldl_p(mem_buf); + break; + case 24: + env->fpscr = ldl_p(mem_buf); + break; + case 25 ... 40: + if (env->fpscr & FPSCR_FR) { + env->fregs[n - 9] = ldfl_p(mem_buf); + } else { + env->fregs[n - 25] = ldfl_p(mem_buf); + } + break; + case 41: + env->ssr = ldl_p(mem_buf); + break; + case 42: + env->spc = ldl_p(mem_buf); + break; + case 43 ... 50: + env->gregs[n - 43] = ldl_p(mem_buf); + break; + case 51 ... 58: + env->gregs[n - (51 - 16)] = ldl_p(mem_buf); + break; default: return 0; } @@ -1660,6 +1696,10 @@ static int cpu_gdb_read_register(CPUXtensaState *env, uint8_t *mem_buf, int n) GET_REG32(env->uregs[reg->targno & 0xff]); break; + case 4: /*f*/ + GET_REG32(float32_val(env->fregs[reg->targno & 0x0f])); + break; + case 8: /*a*/ GET_REG32(env->regs[reg->targno & 0x0f]); break; @@ -1700,6 +1740,10 @@ static int cpu_gdb_write_register(CPUXtensaState *env, uint8_t *mem_buf, int n) env->uregs[reg->targno & 0xff] = tmp; break; + case 4: /*f*/ + env->fregs[reg->targno & 0x0f] = make_float32(tmp); + break; + case 8: /*a*/ env->regs[reg->targno & 0x0f] = tmp; break; diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c index e8dcc6b883..9a0a565723 100644 --- a/hw/cirrus_vga.c +++ b/hw/cirrus_vga.c @@ -2441,6 +2441,8 @@ static uint32_t cirrus_vga_ioport_read(void *opaque, uint32_t addr) VGACommonState *s = &c->vga; int val, index; + qemu_flush_coalesced_mmio_buffer(); + if (vga_ioport_invalid(s, addr)) { val = 0xff; } else { @@ -2534,6 +2536,8 @@ static void cirrus_vga_ioport_write(void *opaque, uint32_t addr, uint32_t val) VGACommonState *s = &c->vga; int index; + qemu_flush_coalesced_mmio_buffer(); + /* check port range access depending on color/monochrome mode */ if (vga_ioport_invalid(s, addr)) { return; @@ -2854,6 +2858,7 @@ static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci, /* I/O handler for LFB */ memory_region_init_io(&s->cirrus_linear_io, &cirrus_linear_io_ops, s, "cirrus-linear-io", VGA_RAM_SIZE); + memory_region_set_flush_coalesced(&s->cirrus_linear_io); /* I/O handler for LFB */ memory_region_init_io(&s->cirrus_linear_bitblt_io, @@ -2861,10 +2866,12 @@ static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci, s, "cirrus-bitblt-mmio", 0x400000); + memory_region_set_flush_coalesced(&s->cirrus_linear_bitblt_io); /* I/O handler for memory-mapped I/O */ memory_region_init_io(&s->cirrus_mmio_io, &cirrus_mmio_io_ops, s, "cirrus-mmio", CIRRUS_PNPMMIO_SIZE); + memory_region_set_flush_coalesced(&s->cirrus_mmio_io); s->real_vram_size = (s->device_id == CIRRUS_ID_CLGD5446) ? 4096 * 1024 : 2048 * 1024; diff --git a/hw/e1000.c b/hw/e1000.c index ae8a6c5523..ec3a7c4ecc 100644 --- a/hw/e1000.c +++ b/hw/e1000.c @@ -295,6 +295,7 @@ set_rx_control(E1000State *s, int index, uint32_t val) s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1; DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT], s->mac_reg[RCTL]); + qemu_flush_queued_packets(&s->nic->nc); } static void @@ -926,6 +927,9 @@ set_rdt(E1000State *s, int index, uint32_t val) { s->check_rxov = 0; s->mac_reg[index] = val & 0xffff; + if (e1000_has_rxbufs(s, 1)) { + qemu_flush_queued_packets(&s->nic->nc); + } } static void diff --git a/hw/eepro100.c b/hw/eepro100.c index 50d117e35e..5b231163d8 100644 --- a/hw/eepro100.c +++ b/hw/eepro100.c @@ -1036,6 +1036,7 @@ static void eepro100_ru_command(EEPRO100State * s, uint8_t val) } set_ru_state(s, ru_ready); s->ru_offset = e100_read_reg4(s, SCBPointer); + qemu_flush_queued_packets(&s->nic->nc); TRACE(OTHER, logout("val=0x%02x (rx start)\n", val)); break; case RX_RESUME: @@ -1770,7 +1771,8 @@ static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t size) if (rfd_command & COMMAND_EL) { /* EL bit is set, so this was the last frame. */ logout("receive: Running out of frames\n"); - set_ru_state(s, ru_suspended); + set_ru_state(s, ru_no_resources); + eepro100_rnr_interrupt(s); } if (rfd_command & COMMAND_S) { /* S bit is set. */ diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 5ea3cadb01..68671bc4de 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1175,7 +1175,6 @@ void ahci_init(AHCIState *s, DeviceState *qdev, DMAContext *dma, int ports) ad->port_no = i; ad->port.dma = &ad->dma; ad->port.dma->ops = &ahci_dma_ops; - ad->port_regs.cmd = PORT_CMD_SPIN_UP | PORT_CMD_POWER_ON; } } @@ -1199,6 +1198,7 @@ void ahci_reset(AHCIState *s) pr->irq_stat = 0; pr->irq_mask = 0; pr->scr_ctl = 0; + pr->cmd = PORT_CMD_SPIN_UP | PORT_CMD_POWER_ON; ahci_reset_port(s, i); } } diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index f7f714c726..685cbaa889 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -875,6 +875,12 @@ static void cmd_start_stop_unit(IDEState *s, uint8_t* buf) int sense; bool start = buf[4] & 1; bool loej = buf[4] & 2; /* load on start, eject on !start */ + int pwrcnd = buf[4] & 0xf0; + + if (pwrcnd) { + /* eject/load only happens for power condition == 0 */ + return; + } if (loej) { if (!start && !s->tray_open && s->tray_locked) { diff --git a/hw/ide/core.c b/hw/ide/core.c index d65ef3d58d..d6fb69c634 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -53,8 +53,6 @@ static const int smart_attributes[][12] = { { 0x0c, 0x03, 0x00, 0x64, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, /* airflow-temperature-celsius */ { 190, 0x03, 0x00, 0x45, 0x45, 0x1f, 0x00, 0x1f, 0x1f, 0x00, 0x00, 0x32}, - /* end of list */ - { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }; static int ide_handle_rw_error(IDEState *s, int error, int op); @@ -1468,9 +1466,7 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val) case SMART_READ_THRESH: memset(s->io_buffer, 0, 0x200); s->io_buffer[0] = 0x01; /* smart struct version */ - for (n=0; n<30; n++) { - if (smart_attributes[n][0] == 0) - break; + for (n = 0; n < ARRAY_SIZE(smart_attributes); n++) { s->io_buffer[2+0+(n*12)] = smart_attributes[n][0]; s->io_buffer[2+1+(n*12)] = smart_attributes[n][11]; } @@ -1484,10 +1480,7 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val) case SMART_READ_DATA: memset(s->io_buffer, 0, 0x200); s->io_buffer[0] = 0x01; /* smart struct version */ - for (n=0; n<30; n++) { - if (smart_attributes[n][0] == 0) { - break; - } + for (n = 0; n < ARRAY_SIZE(smart_attributes); n++) { int i; for(i = 0; i < 11; i++) { s->io_buffer[2+i+(n*12)] = smart_attributes[n][i]; diff --git a/hw/imx_avic.c b/hw/imx_avic.c index 4f010e8ee2..b1a8fe6d4c 100644 --- a/hw/imx_avic.c +++ b/hw/imx_avic.c @@ -6,9 +6,9 @@ * * Copyright (c) 2008 OKL * Copyright (c) 2011 NICTA Pty Ltd - * Originally Written by Hans Jiang + * Originally written by Hans Jiang * - * This code is licenced under the GPL version 2 or later. See + * This code is licensed under the GPL version 2 or later. See * the COPYING file in the top-level directory. * * TODO: implement vectors. diff --git a/hw/imx_timer.c b/hw/imx_timer.c index 16215ccf04..c28c53725a 100644 --- a/hw/imx_timer.c +++ b/hw/imx_timer.c @@ -3,10 +3,10 @@ * * Copyright (c) 2008 OK Labs * Copyright (c) 2011 NICTA Pty Ltd - * Originally Written by Hans Jiang + * Originally written by Hans Jiang * Updated by Peter Chubb * - * This code is licenced under GPL version 2 or later. See + * This code is licensed under GPL version 2 or later. See * the COPYING file in the top-level directory. * */ diff --git a/hw/kzm.c b/hw/kzm.c index 6a5e9dfaca..68cd1b48b9 100644 --- a/hw/kzm.c +++ b/hw/kzm.c @@ -5,7 +5,7 @@ * Written by Hans at OK-Labs * Updated by Peter Chubb. * - * This code is licenced under the GPL, version 2 or later. + * This code is licensed under the GPL, version 2 or later. * See the file `COPYING' in the top level directory. * * It (partially) emulates a Kyoto Microcomputer diff --git a/hw/pc.c b/hw/pc.c index 112739a278..7e7e0e2235 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -53,9 +53,6 @@ #include "bitmap.h" #include "vga-pci.h" -/* output Bochs bios info messages */ -//#define DEBUG_BIOS - /* debug PC/ISA interrupts */ //#define DEBUG_IRQ @@ -534,17 +531,6 @@ static void bochs_bios_write(void *opaque, uint32_t addr, uint32_t val) static int shutdown_index = 0; switch(addr) { - /* Bochs BIOS messages */ - case 0x400: - case 0x401: - /* used to be panic, now unused */ - break; - case 0x402: - case 0x403: -#ifdef DEBUG_BIOS - fprintf(stderr, "%c", val); -#endif - break; case 0x8900: /* same as Bochs power off */ if (val == shutdown_str[shutdown_index]) { @@ -558,16 +544,9 @@ static void bochs_bios_write(void *opaque, uint32_t addr, uint32_t val) } break; - /* LGPL'ed VGA BIOS messages */ case 0x501: case 0x502: exit((val << 1) | 1); - case 0x500: - case 0x503: -#ifdef DEBUG_BIOS - fprintf(stderr, "%c", val); -#endif - break; } } @@ -596,17 +575,11 @@ static void *bochs_bios_init(void) uint64_t *numa_fw_cfg; int i, j; - register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL); - register_ioport_write(0x401, 1, 2, bochs_bios_write, NULL); - register_ioport_write(0x402, 1, 1, bochs_bios_write, NULL); - register_ioport_write(0x403, 1, 1, bochs_bios_write, NULL); register_ioport_write(0x8900, 1, 1, bochs_bios_write, NULL); register_ioport_write(0x501, 1, 1, bochs_bios_write, NULL); register_ioport_write(0x501, 1, 2, bochs_bios_write, NULL); register_ioport_write(0x502, 1, 2, bochs_bios_write, NULL); - register_ioport_write(0x500, 1, 1, bochs_bios_write, NULL); - register_ioport_write(0x503, 1, 1, bochs_bios_write, NULL); fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0); diff --git a/hw/pflash_cfi01.c b/hw/pflash_cfi01.c index d1c742379b..9c42d3105c 100644 --- a/hw/pflash_cfi01.c +++ b/hw/pflash_cfi01.c @@ -41,6 +41,7 @@ #include "block.h" #include "qemu-timer.h" #include "exec-memory.h" +#include "host-utils.h" #define PFLASH_BUG(fmt, ...) \ do { \ @@ -543,42 +544,6 @@ static const MemoryRegionOps pflash_cfi01_ops_le = { .endianness = DEVICE_NATIVE_ENDIAN, }; -/* Count trailing zeroes of a 32 bits quantity */ -static int ctz32 (uint32_t n) -{ - int ret; - - ret = 0; - if (!(n & 0xFFFF)) { - ret += 16; - n = n >> 16; - } - if (!(n & 0xFF)) { - ret += 8; - n = n >> 8; - } - if (!(n & 0xF)) { - ret += 4; - n = n >> 4; - } - if (!(n & 0x3)) { - ret += 2; - n = n >> 2; - } - if (!(n & 0x1)) { - ret++; -#if 0 /* This is not necessary as n is never 0 */ - n = n >> 1; -#endif - } -#if 0 /* This is not necessary as n is never 0 */ - if (!n) - ret++; -#endif - - return ret; -} - pflash_t *pflash_cfi01_register(target_phys_addr_t base, DeviceState *qdev, const char *name, target_phys_addr_t size, @@ -711,7 +676,7 @@ pflash_t *pflash_cfi01_register(target_phys_addr_t base, pfl->cfi_table[0x33] = 'I'; pfl->cfi_table[0x34] = '1'; - pfl->cfi_table[0x35] = '1'; + pfl->cfi_table[0x35] = '0'; pfl->cfi_table[0x36] = 0x00; pfl->cfi_table[0x37] = 0x00; @@ -723,6 +688,8 @@ pflash_t *pflash_cfi01_register(target_phys_addr_t base, pfl->cfi_table[0x3b] = 0x00; pfl->cfi_table[0x3c] = 0x00; + pfl->cfi_table[0x3f] = 0x01; /* Number of protection fields */ + return pfl; } diff --git a/hw/pflash_cfi02.c b/hw/pflash_cfi02.c index 3e2002e4b3..8cb1549587 100644 --- a/hw/pflash_cfi02.c +++ b/hw/pflash_cfi02.c @@ -40,6 +40,7 @@ #include "qemu-timer.h" #include "block.h" #include "exec-memory.h" +#include "host-utils.h" //#define PFLASH_DEBUG #ifdef PFLASH_DEBUG @@ -575,42 +576,6 @@ static const MemoryRegionOps pflash_cfi02_ops_le = { .endianness = DEVICE_NATIVE_ENDIAN, }; -/* Count trailing zeroes of a 32 bits quantity */ -static int ctz32 (uint32_t n) -{ - int ret; - - ret = 0; - if (!(n & 0xFFFF)) { - ret += 16; - n = n >> 16; - } - if (!(n & 0xFF)) { - ret += 8; - n = n >> 8; - } - if (!(n & 0xF)) { - ret += 4; - n = n >> 4; - } - if (!(n & 0x3)) { - ret += 2; - n = n >> 2; - } - if (!(n & 0x1)) { - ret++; -#if 0 /* This is not necessary as n is never 0 */ - n = n >> 1; -#endif - } -#if 0 /* This is not necessary as n is never 0 */ - if (!n) - ret++; -#endif - - return ret; -} - pflash_t *pflash_cfi02_register(target_phys_addr_t base, DeviceState *qdev, const char *name, target_phys_addr_t size, diff --git a/hw/qxl.c b/hw/qxl.c index 5b3f484266..33169f348a 100644 --- a/hw/qxl.c +++ b/hw/qxl.c @@ -18,6 +18,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ +#include <zlib.h> + #include "qemu-common.h" #include "qemu-timer.h" #include "qemu-queue.h" @@ -141,6 +143,7 @@ static void qxl_ring_set_dirty(PCIQXLDevice *qxl); void qxl_set_guest_bug(PCIQXLDevice *qxl, const char *msg, ...) { + trace_qxl_set_guest_bug(qxl->id); qxl_send_events(qxl, QXL_INTERRUPT_ERROR); qxl->guest_bug = 1; if (qxl->guestdebug) { @@ -201,6 +204,7 @@ static void qxl_spice_destroy_surface_wait(PCIQXLDevice *qxl, uint32_t id, spice_qxl_destroy_surface_async(&qxl->ssd.qxl, id, (uintptr_t)cookie); } else { qxl->ssd.worker->destroy_surface_wait(qxl->ssd.worker, id); + qxl_spice_destroy_surface_wait_complete(qxl, id); } } @@ -597,9 +601,9 @@ static int interface_get_command(QXLInstance *sin, struct QXLCommandExt *ext) case QXL_MODE_VGA: ret = false; qemu_mutex_lock(&qxl->ssd.lock); - if (qxl->ssd.update != NULL) { - update = qxl->ssd.update; - qxl->ssd.update = NULL; + update = QTAILQ_FIRST(&qxl->ssd.updates); + if (update != NULL) { + QTAILQ_REMOVE(&qxl->ssd.updates, update, next); *ext = update->ext; ret = true; } @@ -953,6 +957,11 @@ static void interface_set_client_capabilities(QXLInstance *sin, { PCIQXLDevice *qxl = container_of(sin, PCIQXLDevice, ssd.qxl); + if (runstate_check(RUN_STATE_INMIGRATE) || + runstate_check(RUN_STATE_POSTMIGRATE)) { + return; + } + qxl->shadow_rom.client_present = client_present; memcpy(qxl->shadow_rom.client_capabilities, caps, sizeof(caps)); qxl->rom->client_present = client_present; @@ -964,6 +973,79 @@ static void interface_set_client_capabilities(QXLInstance *sin, #endif +#if defined(CONFIG_QXL_CLIENT_MONITORS_CONFIG) \ + && SPICE_SERVER_VERSION >= 0x000b05 + +static uint32_t qxl_crc32(const uint8_t *p, unsigned len) +{ + /* + * zlib xors the seed with 0xffffffff, and xors the result + * again with 0xffffffff; Both are not done with linux's crc32, + * which we want to be compatible with, so undo that. + */ + return crc32(0xffffffff, p, len) ^ 0xffffffff; +} + +/* called from main context only */ +static int interface_client_monitors_config(QXLInstance *sin, + VDAgentMonitorsConfig *monitors_config) +{ + PCIQXLDevice *qxl = container_of(sin, PCIQXLDevice, ssd.qxl); + QXLRom *rom = memory_region_get_ram_ptr(&qxl->rom_bar); + int i; + + /* + * Older windows drivers set int_mask to 0 when their ISR is called, + * then later set it to ~0. So it doesn't relate to the actual interrupts + * handled. However, they are old, so clearly they don't support this + * interrupt + */ + if (qxl->ram->int_mask == 0 || qxl->ram->int_mask == ~0 || + !(qxl->ram->int_mask & QXL_INTERRUPT_CLIENT_MONITORS_CONFIG)) { + trace_qxl_client_monitors_config_unsupported_by_guest(qxl->id, + qxl->ram->int_mask, + monitors_config); + return 0; + } + if (!monitors_config) { + return 1; + } + memset(&rom->client_monitors_config, 0, + sizeof(rom->client_monitors_config)); + rom->client_monitors_config.count = monitors_config->num_of_monitors; + /* monitors_config->flags ignored */ + if (rom->client_monitors_config.count >= + ARRAY_SIZE(rom->client_monitors_config.heads)) { + trace_qxl_client_monitors_config_capped(qxl->id, + monitors_config->num_of_monitors, + ARRAY_SIZE(rom->client_monitors_config.heads)); + rom->client_monitors_config.count = + ARRAY_SIZE(rom->client_monitors_config.heads); + } + for (i = 0 ; i < rom->client_monitors_config.count ; ++i) { + VDAgentMonConfig *monitor = &monitors_config->monitors[i]; + QXLURect *rect = &rom->client_monitors_config.heads[i]; + /* monitor->depth ignored */ + rect->left = monitor->x; + rect->top = monitor->y; + rect->right = monitor->x + monitor->width; + rect->bottom = monitor->y + monitor->height; + } + rom->client_monitors_config_crc = qxl_crc32( + (const uint8_t *)&rom->client_monitors_config, + sizeof(rom->client_monitors_config)); + trace_qxl_client_monitors_config_crc(qxl->id, + sizeof(rom->client_monitors_config), + rom->client_monitors_config_crc); + + trace_qxl_interrupt_client_monitors_config(qxl->id, + rom->client_monitors_config.count, + rom->client_monitors_config.heads); + qxl_send_events(qxl, QXL_INTERRUPT_CLIENT_MONITORS_CONFIG); + return 1; +} +#endif + static const QXLInterface qxl_interface = { .base.type = SPICE_INTERFACE_QXL, .base.description = "qxl gpu", @@ -988,6 +1070,10 @@ static const QXLInterface qxl_interface = { #if SPICE_SERVER_VERSION >= 0x000b04 .set_client_capabilities = interface_set_client_capabilities, #endif +#if SPICE_SERVER_VERSION >= 0x000b05 && \ + defined(CONFIG_QXL_CLIENT_MONITORS_CONFIG) + .client_monitors_config = interface_client_monitors_config, +#endif }; static void qxl_enter_vga_mode(PCIQXLDevice *d) @@ -1402,7 +1488,7 @@ static void ioport_write(void *opaque, target_phys_addr_t addr, break; } trace_qxl_io_unexpected_vga_mode(d->id, - io_port, io_port_to_string(io_port)); + addr, val, io_port_to_string(io_port)); /* be nice to buggy guest drivers */ if (io_port >= QXL_IO_UPDATE_AREA_ASYNC && io_port < QXL_IO_RANGE_SIZE) { @@ -1470,6 +1556,13 @@ async_common: return; } + if (update.left < 0 || update.top < 0 || update.left >= update.right || + update.top >= update.bottom) { + qxl_set_guest_bug(d, "QXL_IO_UPDATE_AREA: " + "invalid area(%d,%d,%d,%d)\n", update.left, + update.right, update.top, update.bottom); + break; + } if (async == QXL_ASYNC) { cookie = qxl_cookie_new(QXL_COOKIE_TYPE_IO, QXL_IO_UPDATE_AREA_ASYNC); @@ -1501,6 +1594,7 @@ async_common: qxl_set_mode(d, val, 0); break; case QXL_IO_LOG: + trace_qxl_io_log(d->id, d->ram->log_buf); if (d->guestdebug) { fprintf(stderr, "qxl/guest-%d: %" PRId64 ": %s", d->id, qemu_get_clock_ns(vm_clock), d->ram->log_buf); @@ -1594,9 +1688,9 @@ cancel_async: static uint64_t ioport_read(void *opaque, target_phys_addr_t addr, unsigned size) { - PCIQXLDevice *d = opaque; + PCIQXLDevice *qxl = opaque; - trace_qxl_io_read_unexpected(d->id); + trace_qxl_io_read_unexpected(qxl->id); return 0xff; } @@ -1626,6 +1720,7 @@ static void qxl_send_events(PCIQXLDevice *d, uint32_t events) uint32_t old_pending; uint32_t le_events = cpu_to_le32(events); + trace_qxl_send_events(d->id, events); assert(qemu_spice_display_is_running(&d->ssd)); old_pending = __sync_fetch_and_or(&d->ram->int_pending, le_events); if ((old_pending & le_events) == le_events) { @@ -1910,6 +2005,7 @@ static int qxl_init_common(PCIQXLDevice *qxl) if (qxl->id == 0) { vga_dirty_log_start(&qxl->vga); } + memory_region_set_flush_coalesced(&qxl->io_bar); pci_register_bar(&qxl->pci, QXL_IO_RANGE_INDEX, diff --git a/hw/srp.h b/hw/srp.h index 3009bd56ce..5e0cad5c19 100644 --- a/hw/srp.h +++ b/hw/srp.h @@ -177,13 +177,13 @@ struct srp_tsk_mgmt { uint8_t reserved1[6]; uint64_t tag; uint8_t reserved2[4]; - uint64_t lun QEMU_PACKED; + uint64_t lun; uint8_t reserved3[2]; uint8_t tsk_mgmt_func; uint8_t reserved4; uint64_t task_tag; uint8_t reserved5[8]; -}; +} QEMU_PACKED; /* * We need the packed attribute because the SRP spec only aligns the @@ -198,14 +198,14 @@ struct srp_cmd { uint8_t data_in_desc_cnt; uint64_t tag; uint8_t reserved2[4]; - uint64_t lun QEMU_PACKED; + uint64_t lun; uint8_t reserved3; uint8_t task_attr; uint8_t reserved4; uint8_t add_cdb_len; uint8_t cdb[16]; uint8_t add_data[0]; -}; +} QEMU_PACKED; enum { SRP_RSP_FLAG_RSPVALID = 1 << 0, diff --git a/hw/usb/dev-network.c b/hw/usb/dev-network.c index c84892c98d..e4a43599b5 100644 --- a/hw/usb/dev-network.c +++ b/hw/usb/dev-network.c @@ -1001,6 +1001,13 @@ static int rndis_keepalive_response(USBNetState *s, return 0; } +/* Prepare to receive the next packet */ +static void usb_net_reset_in_buf(USBNetState *s) +{ + s->in_ptr = s->in_len = 0; + qemu_flush_queued_packets(&s->nic->nc); +} + static int rndis_parse(USBNetState *s, uint8_t *data, int length) { uint32_t msg_type; @@ -1025,7 +1032,8 @@ static int rndis_parse(USBNetState *s, uint8_t *data, int length) case RNDIS_RESET_MSG: rndis_clear_responsequeue(s); - s->out_ptr = s->in_ptr = s->in_len = 0; + s->out_ptr = 0; + usb_net_reset_in_buf(s); return rndis_reset_response(s, (rndis_reset_msg_type *) data); case RNDIS_KEEPALIVE_MSG: @@ -1135,7 +1143,7 @@ static int usb_net_handle_datain(USBNetState *s, USBPacket *p) int ret = USB_RET_NAK; if (s->in_ptr > s->in_len) { - s->in_ptr = s->in_len = 0; + usb_net_reset_in_buf(s); ret = USB_RET_NAK; return ret; } @@ -1152,7 +1160,7 @@ static int usb_net_handle_datain(USBNetState *s, USBPacket *p) if (s->in_ptr >= s->in_len && (is_rndis(s) || (s->in_len & (64 - 1)) || !ret)) { /* no short packet necessary */ - s->in_ptr = s->in_len = 0; + usb_net_reset_in_buf(s); } #ifdef TRAFFIC_DEBUG @@ -1250,20 +1258,32 @@ static int usb_net_handle_data(USBDevice *dev, USBPacket *p) static ssize_t usbnet_receive(NetClientState *nc, const uint8_t *buf, size_t size) { USBNetState *s = DO_UPCAST(NICState, nc, nc)->opaque; - struct rndis_packet_msg_type *msg; + uint8_t *in_buf = s->in_buf; + size_t total_size = size; if (is_rndis(s)) { - msg = (struct rndis_packet_msg_type *) s->in_buf; if (s->rndis_state != RNDIS_DATA_INITIALIZED) { return -1; } - if (size + sizeof(struct rndis_packet_msg_type) > sizeof(s->in_buf)) - return -1; + total_size += sizeof(struct rndis_packet_msg_type); + } + if (total_size > sizeof(s->in_buf)) { + return -1; + } + /* Only accept packet if input buffer is empty */ + if (s->in_len > 0) { + return 0; + } + + if (is_rndis(s)) { + struct rndis_packet_msg_type *msg; + + msg = (struct rndis_packet_msg_type *)in_buf; memset(msg, 0, sizeof(struct rndis_packet_msg_type)); msg->MessageType = cpu_to_le32(RNDIS_PACKET_MSG); - msg->MessageLength = cpu_to_le32(size + sizeof(struct rndis_packet_msg_type)); - msg->DataOffset = cpu_to_le32(sizeof(struct rndis_packet_msg_type) - 8); + msg->MessageLength = cpu_to_le32(size + sizeof(*msg)); + msg->DataOffset = cpu_to_le32(sizeof(*msg) - 8); msg->DataLength = cpu_to_le32(size); /* msg->OOBDataOffset; * msg->OOBDataLength; @@ -1273,14 +1293,11 @@ static ssize_t usbnet_receive(NetClientState *nc, const uint8_t *buf, size_t siz * msg->VcHandle; * msg->Reserved; */ - memcpy(msg + 1, buf, size); - s->in_len = size + sizeof(struct rndis_packet_msg_type); - } else { - if (size > sizeof(s->in_buf)) - return -1; - memcpy(s->in_buf, buf, size); - s->in_len = size; + in_buf += sizeof(*msg); } + + memcpy(in_buf, buf, size); + s->in_len = total_size; s->in_ptr = 0; return size; } diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c index 2f3e9c03a1..6a5da8413f 100644 --- a/hw/usb/hcd-ehci.c +++ b/hw/usb/hcd-ehci.c @@ -34,6 +34,7 @@ #include "monitor.h" #include "trace.h" #include "dma.h" +#include "sysemu.h" #define EHCI_DEBUG 0 @@ -139,6 +140,7 @@ #define NB_PORTS 6 // Number of downstream ports #define BUFF_SIZE 5*4096 // Max bytes to transfer per transaction #define MAX_QH 100 // Max allowable queue heads in a chain +#define MIN_FR_PER_TICK 3 // Min frames to process when catching up /* Internal periodic / asynchronous schedule state machine states */ @@ -389,6 +391,9 @@ struct EHCIState { USBBus bus; qemu_irq irq; MemoryRegion mem; + MemoryRegion mem_caps; + MemoryRegion mem_opreg; + MemoryRegion mem_ports; int companion_count; /* properties */ @@ -398,10 +403,10 @@ struct EHCIState { * EHCI spec version 1.0 Section 2.3 * Host Controller Operational Registers */ + uint8_t caps[OPREGBASE]; union { - uint8_t mmio[MMIO_SIZE]; + uint32_t opreg[(PORTSC_BEGIN-OPREGBASE)/sizeof(uint32_t)]; struct { - uint8_t cap[OPREGBASE]; uint32_t usbcmd; uint32_t usbsts; uint32_t usbintr; @@ -411,9 +416,9 @@ struct EHCIState { uint32_t asynclistaddr; uint32_t notused[9]; uint32_t configflag; - uint32_t portsc[NB_PORTS]; }; }; + uint32_t portsc[NB_PORTS]; /* * Internal states, shadow registers, etc @@ -471,22 +476,12 @@ static const char *ehci_state_names[] = { }; static const char *ehci_mmio_names[] = { - [CAPLENGTH] = "CAPLENGTH", - [HCIVERSION] = "HCIVERSION", - [HCSPARAMS] = "HCSPARAMS", - [HCCPARAMS] = "HCCPARAMS", [USBCMD] = "USBCMD", [USBSTS] = "USBSTS", [USBINTR] = "USBINTR", [FRINDEX] = "FRINDEX", [PERIODICLISTBASE] = "P-LIST BASE", [ASYNCLISTADDR] = "A-LIST ADDR", - [PORTSC_BEGIN] = "PORTSC #0", - [PORTSC_BEGIN + 4] = "PORTSC #1", - [PORTSC_BEGIN + 8] = "PORTSC #2", - [PORTSC_BEGIN + 12] = "PORTSC #3", - [PORTSC_BEGIN + 16] = "PORTSC #4", - [PORTSC_BEGIN + 20] = "PORTSC #5", [CONFIGFLAG] = "CONFIGFLAG", }; @@ -509,7 +504,8 @@ static const char *state2str(uint32_t state) static const char *addr2str(target_phys_addr_t addr) { - return nr2str(ehci_mmio_names, ARRAY_SIZE(ehci_mmio_names), addr); + return nr2str(ehci_mmio_names, ARRAY_SIZE(ehci_mmio_names), + addr + OPREGBASE); } static void ehci_trace_usbsts(uint32_t mask, int state) @@ -853,10 +849,10 @@ static EHCIQueue *ehci_find_queue_by_qh(EHCIState *ehci, uint32_t addr, return NULL; } -static void ehci_queues_rip_unused(EHCIState *ehci, int async, int flush) +static void ehci_queues_rip_unused(EHCIState *ehci, int async) { EHCIQueueHead *head = async ? &ehci->aqueues : &ehci->pqueues; - const char *warn = (async && !flush) ? "guest unlinked busy QH" : NULL; + const char *warn = async ? "guest unlinked busy QH" : NULL; uint64_t maxage = FRAME_TIMER_NS * ehci->maxframes * 4; EHCIQueue *q, *tmp; @@ -866,13 +862,25 @@ static void ehci_queues_rip_unused(EHCIState *ehci, int async, int flush) q->ts = ehci->last_run_ns; continue; } - if (!flush && ehci->last_run_ns < q->ts + maxage) { + if (ehci->last_run_ns < q->ts + maxage) { continue; } ehci_free_queue(q, warn); } } +static void ehci_queues_rip_unseen(EHCIState *ehci, int async) +{ + EHCIQueueHead *head = async ? &ehci->aqueues : &ehci->pqueues; + EHCIQueue *q, *tmp; + + QTAILQ_FOREACH_SAFE(q, head, next, tmp) { + if (!q->seen) { + ehci_free_queue(q, NULL); + } + } +} + static void ehci_queues_rip_device(EHCIState *ehci, USBDevice *dev, int async) { EHCIQueueHead *head = async ? &ehci->aqueues : &ehci->pqueues; @@ -1018,7 +1026,7 @@ static int ehci_register_companion(USBBus *bus, USBPort *ports[], } s->companion_count++; - s->mmio[0x05] = (s->companion_count << 4) | portcount; + s->caps[0x05] = (s->companion_count << 4) | portcount; return 0; } @@ -1063,7 +1071,8 @@ static void ehci_reset(void *opaque) } } - memset(&s->mmio[OPREGBASE], 0x00, MMIO_SIZE - OPREGBASE); + memset(&s->opreg, 0x00, sizeof(s->opreg)); + memset(&s->portsc, 0x00, sizeof(s->portsc)); s->usbcmd = NB_MAXINTRATE << USBCMD_ITC_SH; s->usbsts = USBSTS_HALT; @@ -1090,50 +1099,35 @@ static void ehci_reset(void *opaque) qemu_bh_cancel(s->async_bh); } -static uint32_t ehci_mem_readb(void *ptr, target_phys_addr_t addr) +static uint64_t ehci_caps_read(void *ptr, target_phys_addr_t addr, + unsigned size) { EHCIState *s = ptr; - uint32_t val; - - val = s->mmio[addr]; - - return val; + return s->caps[addr]; } -static uint32_t ehci_mem_readw(void *ptr, target_phys_addr_t addr) +static uint64_t ehci_opreg_read(void *ptr, target_phys_addr_t addr, + unsigned size) { EHCIState *s = ptr; uint32_t val; - val = s->mmio[addr] | (s->mmio[addr+1] << 8); - + val = s->opreg[addr >> 2]; + trace_usb_ehci_opreg_read(addr + OPREGBASE, addr2str(addr), val); return val; } -static uint32_t ehci_mem_readl(void *ptr, target_phys_addr_t addr) +static uint64_t ehci_port_read(void *ptr, target_phys_addr_t addr, + unsigned size) { EHCIState *s = ptr; uint32_t val; - val = s->mmio[addr] | (s->mmio[addr+1] << 8) | - (s->mmio[addr+2] << 16) | (s->mmio[addr+3] << 24); - - trace_usb_ehci_mmio_readl(addr, addr2str(addr), val); + val = s->portsc[addr >> 2]; + trace_usb_ehci_portsc_read(addr + PORTSC_BEGIN, addr >> 2, val); return val; } -static void ehci_mem_writeb(void *ptr, target_phys_addr_t addr, uint32_t val) -{ - fprintf(stderr, "EHCI doesn't handle byte writes to MMIO\n"); - exit(1); -} - -static void ehci_mem_writew(void *ptr, target_phys_addr_t addr, uint32_t val) -{ - fprintf(stderr, "EHCI doesn't handle 16-bit writes to MMIO\n"); - exit(1); -} - static void handle_port_owner_write(EHCIState *s, int port, uint32_t owner) { USBDevice *dev = s->ports[port].dev; @@ -1162,11 +1156,17 @@ static void handle_port_owner_write(EHCIState *s, int port, uint32_t owner) } } -static void handle_port_status_write(EHCIState *s, int port, uint32_t val) +static void ehci_port_write(void *ptr, target_phys_addr_t addr, + uint64_t val, unsigned size) { + EHCIState *s = ptr; + int port = addr >> 2; uint32_t *portsc = &s->portsc[port]; + uint32_t old = *portsc; USBDevice *dev = s->ports[port].dev; + trace_usb_ehci_portsc_write(addr + PORTSC_BEGIN, addr >> 2, val); + /* Clear rwc bits */ *portsc &= ~(val & PORTSC_RWC_MASK); /* The guest may clear, but not set the PED bit */ @@ -1198,39 +1198,20 @@ static void handle_port_status_write(EHCIState *s, int port, uint32_t val) *portsc &= ~PORTSC_RO_MASK; *portsc |= val; + trace_usb_ehci_portsc_change(addr + PORTSC_BEGIN, addr >> 2, *portsc, old); } -static void ehci_mem_writel(void *ptr, target_phys_addr_t addr, uint32_t val) +static void ehci_opreg_write(void *ptr, target_phys_addr_t addr, + uint64_t val, unsigned size) { EHCIState *s = ptr; - uint32_t *mmio = (uint32_t *)(&s->mmio[addr]); + uint32_t *mmio = s->opreg + (addr >> 2); uint32_t old = *mmio; int i; - trace_usb_ehci_mmio_writel(addr, addr2str(addr), val); - - /* Only aligned reads are allowed on OHCI */ - if (addr & 3) { - fprintf(stderr, "usb-ehci: Mis-aligned write to addr 0x" - TARGET_FMT_plx "\n", addr); - return; - } - - if (addr >= PORTSC && addr < PORTSC + 4 * NB_PORTS) { - handle_port_status_write(s, (addr-PORTSC)/4, val); - trace_usb_ehci_mmio_change(addr, addr2str(addr), *mmio, old); - return; - } - - if (addr < OPREGBASE) { - fprintf(stderr, "usb-ehci: write attempt to read-only register" - TARGET_FMT_plx "\n", addr); - return; - } + trace_usb_ehci_opreg_write(addr + OPREGBASE, addr2str(addr), val); - - /* Do any register specific pre-write processing here. */ - switch(addr) { + switch (addr + OPREGBASE) { case USBCMD: if (val & USBCMD_HCRESET) { ehci_reset(s); @@ -1241,7 +1222,7 @@ static void ehci_mem_writel(void *ptr, target_phys_addr_t addr, uint32_t val) /* not supporting dynamic frame list size at the moment */ if ((val & USBCMD_FLS) && !(s->usbcmd & USBCMD_FLS)) { fprintf(stderr, "attempt to set frame list size -- value %d\n", - val & USBCMD_FLS); + (int)val & USBCMD_FLS); val &= ~USBCMD_FLS; } @@ -1308,7 +1289,7 @@ static void ehci_mem_writel(void *ptr, target_phys_addr_t addr, uint32_t val) } *mmio = val; - trace_usb_ehci_mmio_change(addr, addr2str(addr), *mmio, old); + trace_usb_ehci_opreg_change(addr + OPREGBASE, addr2str(addr), *mmio, old); } @@ -1732,7 +1713,7 @@ static int ehci_state_waitlisthead(EHCIState *ehci, int async) ehci_set_usbsts(ehci, USBSTS_REC); } - ehci_queues_rip_unused(ehci, async, 0); + ehci_queues_rip_unused(ehci, async); /* Find the head of the list (4.9.1.1) */ for(i = 0; i < MAX_QH; i++) { @@ -2364,7 +2345,7 @@ static void ehci_advance_async_state(EHCIState *ehci) */ if (ehci->usbcmd & USBCMD_IAAD) { /* Remove all unseen qhs from the async qhs queue */ - ehci_queues_rip_unused(ehci, async, 1); + ehci_queues_rip_unseen(ehci, async); trace_usb_ehci_doorbell_ack(); ehci->usbcmd &= ~USBCMD_IAAD; ehci_raise_irq(ehci, USBSTS_IAA); @@ -2417,7 +2398,7 @@ static void ehci_advance_periodic_state(EHCIState *ehci) ehci_set_fetch_addr(ehci, async,entry); ehci_set_state(ehci, async, EST_FETCHENTRY); ehci_advance_state(ehci, async); - ehci_queues_rip_unused(ehci, async, 0); + ehci_queues_rip_unused(ehci, async); break; default: @@ -2446,7 +2427,7 @@ static void ehci_update_frindex(EHCIState *ehci, int frames) if (ehci->frindex == 0x00004000) { ehci_raise_irq(ehci, USBSTS_FLR); ehci->frindex = 0; - if (ehci->usbsts_frindex > 0x00004000) { + if (ehci->usbsts_frindex >= 0x00004000) { ehci->usbsts_frindex -= 0x00004000; } else { ehci->usbsts_frindex = 0; @@ -2481,6 +2462,19 @@ static void ehci_frame_timer(void *opaque) } for (i = 0; i < frames; i++) { + /* + * If we're running behind schedule, we should not catch up + * too fast, as that will make some guests unhappy: + * 1) We must process a minimum of MIN_FR_PER_TICK frames, + * otherwise we will never catch up + * 2) Process frames until the guest has requested an irq (IOC) + */ + if (i >= MIN_FR_PER_TICK) { + ehci_commit_irq(ehci); + if ((ehci->usbsts & USBINTR_MASK) & ehci->usbintr) { + break; + } + } ehci_update_frindex(ehci, 1); ehci_advance_periodic_state(ehci); ehci->last_run_ns += FRAME_TIMER_NS; @@ -2520,11 +2514,28 @@ static void ehci_async_bh(void *opaque) ehci_advance_async_state(ehci); } -static const MemoryRegionOps ehci_mem_ops = { - .old_mmio = { - .read = { ehci_mem_readb, ehci_mem_readw, ehci_mem_readl }, - .write = { ehci_mem_writeb, ehci_mem_writew, ehci_mem_writel }, - }, +static const MemoryRegionOps ehci_mmio_caps_ops = { + .read = ehci_caps_read, + .valid.min_access_size = 1, + .valid.max_access_size = 4, + .impl.min_access_size = 1, + .impl.max_access_size = 1, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static const MemoryRegionOps ehci_mmio_opreg_ops = { + .read = ehci_opreg_read, + .write = ehci_opreg_write, + .valid.min_access_size = 4, + .valid.max_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static const MemoryRegionOps ehci_mmio_port_ops = { + .read = ehci_port_read, + .write = ehci_port_write, + .valid.min_access_size = 4, + .valid.max_access_size = 4, .endianness = DEVICE_LITTLE_ENDIAN, }; @@ -2562,6 +2573,32 @@ static int usb_ehci_post_load(void *opaque, int version_id) return 0; } +static void usb_ehci_vm_state_change(void *opaque, int running, RunState state) +{ + EHCIState *ehci = opaque; + + /* + * We don't migrate the EHCIQueue-s, instead we rebuild them for the + * schedule in guest memory. We must do the rebuilt ASAP, so that + * USB-devices which have async handled packages have a packet in the + * ep queue to match the completion with. + */ + if (state == RUN_STATE_RUNNING) { + ehci_advance_async_state(ehci); + } + + /* + * The schedule rebuilt from guest memory could cause the migration dest + * to miss a QH unlink, and fail to cancel packets, since the unlinked QH + * will never have existed on the destination. Therefor we must flush the + * async schedule on savevm to catch any not yet noticed unlinks. + */ + if (state == RUN_STATE_SAVE_VM) { + ehci_advance_async_state(ehci); + ehci_queues_rip_unseen(ehci, 1); + } +} + static const VMStateDescription vmstate_ehci = { .name = "ehci", .version_id = 2, @@ -2681,19 +2718,19 @@ static int usb_ehci_initfn(PCIDevice *dev) pci_conf[0x6e] = 0x00; pci_conf[0x6f] = 0xc0; // USBLEFCTLSTS - // 2.2 host controller interface version - s->mmio[0x00] = (uint8_t) OPREGBASE; - s->mmio[0x01] = 0x00; - s->mmio[0x02] = 0x00; - s->mmio[0x03] = 0x01; // HC version - s->mmio[0x04] = NB_PORTS; // Number of downstream ports - s->mmio[0x05] = 0x00; // No companion ports at present - s->mmio[0x06] = 0x00; - s->mmio[0x07] = 0x00; - s->mmio[0x08] = 0x80; // We can cache whole frame, not 64-bit capable - s->mmio[0x09] = 0x68; // EECP - s->mmio[0x0a] = 0x00; - s->mmio[0x0b] = 0x00; + /* 2.2 host controller interface version */ + s->caps[0x00] = (uint8_t) OPREGBASE; + s->caps[0x01] = 0x00; + s->caps[0x02] = 0x00; + s->caps[0x03] = 0x01; /* HC version */ + s->caps[0x04] = NB_PORTS; /* Number of downstream ports */ + s->caps[0x05] = 0x00; /* No companion ports at present */ + s->caps[0x06] = 0x00; + s->caps[0x07] = 0x00; + s->caps[0x08] = 0x80; /* We can cache whole frame, no 64-bit */ + s->caps[0x09] = 0x68; /* EECP */ + s->caps[0x0a] = 0x00; + s->caps[0x0b] = 0x00; s->irq = s->dev.irq[3]; @@ -2711,8 +2748,20 @@ static int usb_ehci_initfn(PCIDevice *dev) usb_packet_init(&s->ipacket); qemu_register_reset(ehci_reset, s); + qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s); + + memory_region_init(&s->mem, "ehci", MMIO_SIZE); + memory_region_init_io(&s->mem_caps, &ehci_mmio_caps_ops, s, + "capabilities", OPREGBASE); + memory_region_init_io(&s->mem_opreg, &ehci_mmio_opreg_ops, s, + "operational", PORTSC_BEGIN - OPREGBASE); + memory_region_init_io(&s->mem_ports, &ehci_mmio_port_ops, s, + "ports", PORTSC_END - PORTSC_BEGIN); + + memory_region_add_subregion(&s->mem, 0, &s->mem_caps); + memory_region_add_subregion(&s->mem, OPREGBASE, &s->mem_opreg); + memory_region_add_subregion(&s->mem, PORTSC_BEGIN, &s->mem_ports); - memory_region_init_io(&s->mem, &ehci_mem_ops, s, "ehci", MMIO_SIZE); pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mem); return 0; diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c index c7c878635a..cdc8bc3fba 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c @@ -1000,6 +1000,9 @@ static void uhci_fill_queue(UHCIState *s, UHCI_TD *td) } assert(ret == TD_RESULT_ASYNC_START); assert(int_mask == 0); + if (ptd.ctrl & TD_CTRL_SPD) { + break; + } plink = ptd.link; } } @@ -1097,7 +1100,7 @@ static void uhci_process_frame(UHCIState *s) case TD_RESULT_ASYNC_START: trace_usb_uhci_td_async(curr_qh & ~0xf, link & ~0xf); - if (is_valid(td.link)) { + if (is_valid(td.link) && !(td.ctrl & TD_CTRL_SPD)) { uhci_fill_queue(s, &td); } link = curr_qh ? qh.link : td.link; diff --git a/hw/usb/host-linux.c b/hw/usb/host-linux.c index 8df92074d3..44f1a64b35 100644 --- a/hw/usb/host-linux.c +++ b/hw/usb/host-linux.c @@ -1045,7 +1045,6 @@ static int usb_host_handle_control(USBDevice *dev, USBPacket *p, /* Note request is (bRequestType << 8) | bRequest */ trace_usb_host_req_control(s->bus_num, s->addr, p, request, value, index); - assert(p->result == 0); switch (request) { case DeviceOutRequest | USB_REQ_SET_ADDRESS: @@ -1074,6 +1073,7 @@ static int usb_host_handle_control(USBDevice *dev, USBPacket *p, } /* The rest are asynchronous */ + assert(p && p->result == 0); if (length > sizeof(dev->data_buf)) { fprintf(stderr, "husb: ctrl buffer too small (%d > %zu)\n", diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c index 5301a69c4f..b10241a139 100644 --- a/hw/usb/redirect.c +++ b/hw/usb/redirect.c @@ -43,7 +43,6 @@ #define EP2I(ep_address) (((ep_address & 0x80) >> 3) | (ep_address & 0x0f)) #define I2EP(i) (((i & 0x10) << 3) | (i & 0x0f)) -typedef struct Cancelled Cancelled; typedef struct USBRedirDevice USBRedirDevice; /* Struct to hold buffered packets (iso or int input packets) */ @@ -58,6 +57,7 @@ struct endp_data { uint8_t type; uint8_t interval; uint8_t interface; /* bInterfaceNumber this ep belongs to */ + uint16_t max_packet_size; /* In bytes, not wMaxPacketSize format !! */ uint8_t iso_started; uint8_t iso_error; /* For reporting iso errors to the HC */ uint8_t interrupt_started; @@ -65,8 +65,20 @@ struct endp_data { uint8_t bufpq_prefilled; uint8_t bufpq_dropping_packets; QTAILQ_HEAD(, buf_packet) bufpq; - int bufpq_size; - int bufpq_target_size; + int32_t bufpq_size; + int32_t bufpq_target_size; +}; + +struct PacketIdQueueEntry { + uint64_t id; + QTAILQ_ENTRY(PacketIdQueueEntry)next; +}; + +struct PacketIdQueue { + USBRedirDevice *dev; + const char *name; + QTAILQ_HEAD(, PacketIdQueueEntry) head; + int size; }; struct USBRedirDevice { @@ -86,7 +98,8 @@ struct USBRedirDevice { int64_t next_attach_time; struct usbredirparser *parser; struct endp_data endpoint[MAX_ENDPOINTS]; - QTAILQ_HEAD(, Cancelled) cancelled; + struct PacketIdQueue cancelled; + struct PacketIdQueue already_in_flight; /* Data for device filtering */ struct usb_redir_device_connect_header device_info; struct usb_redir_interface_info_header interface_info; @@ -94,11 +107,6 @@ struct USBRedirDevice { int filter_rules_count; }; -struct Cancelled { - uint64_t id; - QTAILQ_ENTRY(Cancelled)next; -}; - static void usbredir_hello(void *priv, struct usb_redir_hello_header *h); static void usbredir_device_connect(void *priv, struct usb_redir_device_connect_header *device_connect); @@ -134,6 +142,8 @@ static void usbredir_interrupt_packet(void *priv, uint64_t id, static int usbredir_handle_status(USBRedirDevice *dev, int status, int actual_len); +#define VERSION "qemu usb-redir guest " QEMU_VERSION + /* * Logging stuff */ @@ -232,6 +242,11 @@ static int usbredir_write(void *priv, uint8_t *data, int count) return 0; } + /* Don't send new data to the chardev until our state is fully synced */ + if (!runstate_check(RUN_STATE_RUNNING)) { + return 0; + } + return qemu_chr_fe_write(dev->cs, data, count); } @@ -239,37 +254,103 @@ static int usbredir_write(void *priv, uint8_t *data, int count) * Cancelled and buffered packets helpers */ -static void usbredir_cancel_packet(USBDevice *udev, USBPacket *p) +static void packet_id_queue_init(struct PacketIdQueue *q, + USBRedirDevice *dev, const char *name) { - USBRedirDevice *dev = DO_UPCAST(USBRedirDevice, dev, udev); - Cancelled *c; + q->dev = dev; + q->name = name; + QTAILQ_INIT(&q->head); + q->size = 0; +} + +static void packet_id_queue_add(struct PacketIdQueue *q, uint64_t id) +{ + USBRedirDevice *dev = q->dev; + struct PacketIdQueueEntry *e; + + DPRINTF("adding packet id %"PRIu64" to %s queue\n", id, q->name); - DPRINTF("cancel packet id %"PRIu64"\n", p->id); + e = g_malloc0(sizeof(struct PacketIdQueueEntry)); + e->id = id; + QTAILQ_INSERT_TAIL(&q->head, e, next); + q->size++; +} - c = g_malloc0(sizeof(Cancelled)); - c->id = p->id; - QTAILQ_INSERT_TAIL(&dev->cancelled, c, next); +static int packet_id_queue_remove(struct PacketIdQueue *q, uint64_t id) +{ + USBRedirDevice *dev = q->dev; + struct PacketIdQueueEntry *e; + + QTAILQ_FOREACH(e, &q->head, next) { + if (e->id == id) { + DPRINTF("removing packet id %"PRIu64" from %s queue\n", + id, q->name); + QTAILQ_REMOVE(&q->head, e, next); + q->size--; + g_free(e); + return 1; + } + } + return 0; +} +static void packet_id_queue_empty(struct PacketIdQueue *q) +{ + USBRedirDevice *dev = q->dev; + struct PacketIdQueueEntry *e, *next_e; + + DPRINTF("removing %d packet-ids from %s queue\n", q->size, q->name); + + QTAILQ_FOREACH_SAFE(e, &q->head, next, next_e) { + QTAILQ_REMOVE(&q->head, e, next); + g_free(e); + } + q->size = 0; +} + +static void usbredir_cancel_packet(USBDevice *udev, USBPacket *p) +{ + USBRedirDevice *dev = DO_UPCAST(USBRedirDevice, dev, udev); + + packet_id_queue_add(&dev->cancelled, p->id); usbredirparser_send_cancel_data_packet(dev->parser, p->id); usbredirparser_do_write(dev->parser); } static int usbredir_is_cancelled(USBRedirDevice *dev, uint64_t id) { - Cancelled *c; - if (!dev->dev.attached) { return 1; /* Treat everything as cancelled after a disconnect */ } + return packet_id_queue_remove(&dev->cancelled, id); +} + +static void usbredir_fill_already_in_flight_from_ep(USBRedirDevice *dev, + struct USBEndpoint *ep) +{ + static USBPacket *p; + + QTAILQ_FOREACH(p, &ep->queue, queue) { + packet_id_queue_add(&dev->already_in_flight, p->id); + } +} - QTAILQ_FOREACH(c, &dev->cancelled, next) { - if (c->id == id) { - QTAILQ_REMOVE(&dev->cancelled, c, next); - g_free(c); - return 1; - } +static void usbredir_fill_already_in_flight(USBRedirDevice *dev) +{ + int ep; + struct USBDevice *udev = &dev->dev; + + usbredir_fill_already_in_flight_from_ep(dev, &udev->ep_ctl); + + for (ep = 0; ep < USB_MAX_ENDPOINTS; ep++) { + usbredir_fill_already_in_flight_from_ep(dev, &udev->ep_in[ep]); + usbredir_fill_already_in_flight_from_ep(dev, &udev->ep_out[ep]); } - return 0; +} + +static int usbredir_already_in_flight(USBRedirDevice *dev, uint64_t id) +{ + return packet_id_queue_remove(&dev->already_in_flight, id); } static USBPacket *usbredir_find_packet_by_id(USBRedirDevice *dev, @@ -487,6 +568,10 @@ static int usbredir_handle_bulk_data(USBRedirDevice *dev, USBPacket *p, DPRINTF("bulk-out ep %02X len %zd id %"PRIu64"\n", ep, p->iov.size, p->id); + if (usbredir_already_in_flight(dev, p->id)) { + return USB_RET_ASYNC; + } + bulk_packet.endpoint = ep; bulk_packet.length = p->iov.size; bulk_packet.stream_id = 0; @@ -567,6 +652,10 @@ static int usbredir_handle_interrupt_data(USBRedirDevice *dev, DPRINTF("interrupt-out ep %02X len %zd id %"PRIu64"\n", ep, p->iov.size, p->id); + if (usbredir_already_in_flight(dev, p->id)) { + return USB_RET_ASYNC; + } + interrupt_packet.endpoint = ep; interrupt_packet.length = p->iov.size; @@ -709,6 +798,10 @@ static int usbredir_handle_control(USBDevice *udev, USBPacket *p, USBRedirDevice *dev = DO_UPCAST(USBRedirDevice, dev, udev); struct usb_redir_control_packet_header control_packet; + if (usbredir_already_in_flight(dev, p->id)) { + return USB_RET_ASYNC; + } + /* Special cases for certain standard device requests */ switch (request) { case DeviceOutRequest | USB_REQ_SET_ADDRESS: @@ -763,6 +856,7 @@ static void usbredir_chardev_close_bh(void *opaque) usbredir_device_disconnect(dev); if (dev->parser) { + DPRINTF("destroying usbredirparser\n"); usbredirparser_destroy(dev->parser); dev->parser = NULL; } @@ -771,14 +865,13 @@ static void usbredir_chardev_close_bh(void *opaque) static void usbredir_chardev_open(USBRedirDevice *dev) { uint32_t caps[USB_REDIR_CAPS_SIZE] = { 0, }; - char version[32]; + int flags = 0; /* Make sure any pending closes are handled (no-op if none pending) */ usbredir_chardev_close_bh(dev); qemu_bh_cancel(dev->chardev_close_bh); - strcpy(version, "qemu usb-redir guest "); - pstrcat(version, sizeof(version), qemu_get_version()); + DPRINTF("creating usbredirparser\n"); dev->parser = qemu_oom_check(usbredirparser_create()); dev->parser->priv = dev; @@ -807,7 +900,12 @@ static void usbredir_chardev_open(USBRedirDevice *dev) usbredirparser_caps_set_cap(caps, usb_redir_cap_filter); usbredirparser_caps_set_cap(caps, usb_redir_cap_ep_info_max_packet_size); usbredirparser_caps_set_cap(caps, usb_redir_cap_64bits_ids); - usbredirparser_init(dev->parser, version, caps, USB_REDIR_CAPS_SIZE, 0); + + if (runstate_check(RUN_STATE_INMIGRATE)) { + flags |= usbredirparser_fl_no_hello; + } + usbredirparser_init(dev->parser, VERSION, caps, USB_REDIR_CAPS_SIZE, + flags); usbredirparser_do_write(dev->parser); } @@ -853,6 +951,11 @@ static int usbredir_chardev_can_read(void *opaque) return 0; } + /* Don't read new data from the chardev until our state is fully synced */ + if (!runstate_check(RUN_STATE_RUNNING)) { + return 0; + } + /* usbredir_parser_do_read will consume *all* data we give it */ return 1024 * 1024; } @@ -878,9 +981,11 @@ static void usbredir_chardev_event(void *opaque, int event) switch (event) { case CHR_EVENT_OPENED: + DPRINTF("chardev open\n"); usbredir_chardev_open(dev); break; case CHR_EVENT_CLOSED: + DPRINTF("chardev close\n"); qemu_bh_schedule(dev->chardev_close_bh); break; } @@ -890,6 +995,15 @@ static void usbredir_chardev_event(void *opaque, int event) * init + destroy */ +static void usbredir_vm_state_change(void *priv, int running, RunState state) +{ + USBRedirDevice *dev = priv; + + if (state == RUN_STATE_RUNNING && dev->parser != NULL) { + usbredirparser_do_write(dev->parser); /* Flush any pending writes */ + } +} + static int usbredir_initfn(USBDevice *udev) { USBRedirDevice *dev = DO_UPCAST(USBRedirDevice, dev, udev); @@ -914,7 +1028,8 @@ static int usbredir_initfn(USBDevice *udev) dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev); dev->attach_timer = qemu_new_timer_ms(vm_clock, usbredir_do_attach, dev); - QTAILQ_INIT(&dev->cancelled); + packet_id_queue_init(&dev->cancelled, dev, "cancelled"); + packet_id_queue_init(&dev->already_in_flight, dev, "already-in-flight"); for (i = 0; i < MAX_ENDPOINTS; i++) { QTAILQ_INIT(&dev->endpoint[i].bufpq); } @@ -927,19 +1042,17 @@ static int usbredir_initfn(USBDevice *udev) qemu_chr_add_handlers(dev->cs, usbredir_chardev_can_read, usbredir_chardev_read, usbredir_chardev_event, dev); + qemu_add_vm_change_state_handler(usbredir_vm_state_change, dev); add_boot_device_path(dev->bootindex, &udev->qdev, NULL); return 0; } static void usbredir_cleanup_device_queues(USBRedirDevice *dev) { - Cancelled *c, *next_c; int i; - QTAILQ_FOREACH_SAFE(c, &dev->cancelled, next, next_c) { - QTAILQ_REMOVE(&dev->cancelled, c, next); - g_free(c); - } + packet_id_queue_empty(&dev->cancelled); + packet_id_queue_empty(&dev->already_in_flight); for (i = 0; i < MAX_ENDPOINTS; i++) { usbredir_free_bufpq(dev, I2EP(i)); } @@ -1118,6 +1231,7 @@ static void usbredir_device_disconnect(void *priv) qemu_del_timer(dev->attach_timer); if (dev->dev.attached) { + DPRINTF("detaching device\n"); usb_device_detach(&dev->dev); /* * Delay next usb device attach to give the guest a chance to see @@ -1195,7 +1309,8 @@ static void usbredir_ep_info(void *priv, usb_ep->ifnum = dev->endpoint[i].interface; if (usbredirparser_peer_has_cap(dev->parser, usb_redir_cap_ep_info_max_packet_size)) { - usb_ep->max_packet_size = ep_info->max_packet_size[i]; + dev->endpoint[i].max_packet_size = + usb_ep->max_packet_size = ep_info->max_packet_size[i]; } if (ep_info->type[i] == usb_redir_type_bulk) { usb_ep->pipeline = true; @@ -1418,6 +1533,322 @@ static void usbredir_interrupt_packet(void *priv, uint64_t id, } } +/* + * Migration code + */ + +static void usbredir_pre_save(void *priv) +{ + USBRedirDevice *dev = priv; + + usbredir_fill_already_in_flight(dev); +} + +static int usbredir_post_load(void *priv, int version_id) +{ + USBRedirDevice *dev = priv; + struct USBEndpoint *usb_ep; + int i; + + switch (dev->device_info.speed) { + case usb_redir_speed_low: + dev->dev.speed = USB_SPEED_LOW; + break; + case usb_redir_speed_full: + dev->dev.speed = USB_SPEED_FULL; + break; + case usb_redir_speed_high: + dev->dev.speed = USB_SPEED_HIGH; + break; + case usb_redir_speed_super: + dev->dev.speed = USB_SPEED_SUPER; + break; + default: + dev->dev.speed = USB_SPEED_FULL; + } + dev->dev.speedmask = (1 << dev->dev.speed); + + for (i = 0; i < MAX_ENDPOINTS; i++) { + usb_ep = usb_ep_get(&dev->dev, + (i & 0x10) ? USB_TOKEN_IN : USB_TOKEN_OUT, + i & 0x0f); + usb_ep->type = dev->endpoint[i].type; + usb_ep->ifnum = dev->endpoint[i].interface; + usb_ep->max_packet_size = dev->endpoint[i].max_packet_size; + if (dev->endpoint[i].type == usb_redir_type_bulk) { + usb_ep->pipeline = true; + } + } + return 0; +} + +/* For usbredirparser migration */ +static void usbredir_put_parser(QEMUFile *f, void *priv, size_t unused) +{ + USBRedirDevice *dev = priv; + uint8_t *data; + int len; + + if (dev->parser == NULL) { + qemu_put_be32(f, 0); + return; + } + + usbredirparser_serialize(dev->parser, &data, &len); + qemu_oom_check(data); + + qemu_put_be32(f, len); + qemu_put_buffer(f, data, len); + + free(data); +} + +static int usbredir_get_parser(QEMUFile *f, void *priv, size_t unused) +{ + USBRedirDevice *dev = priv; + uint8_t *data; + int len, ret; + + len = qemu_get_be32(f); + if (len == 0) { + return 0; + } + + /* + * Our chardev should be open already at this point, otherwise + * the usbredir channel will be broken (ie spice without seamless) + */ + if (dev->parser == NULL) { + ERROR("get_parser called with closed chardev, failing migration\n"); + return -1; + } + + data = g_malloc(len); + qemu_get_buffer(f, data, len); + + ret = usbredirparser_unserialize(dev->parser, data, len); + + g_free(data); + + return ret; +} + +static const VMStateInfo usbredir_parser_vmstate_info = { + .name = "usb-redir-parser", + .put = usbredir_put_parser, + .get = usbredir_get_parser, +}; + + +/* For buffered packets (iso/irq) queue migration */ +static void usbredir_put_bufpq(QEMUFile *f, void *priv, size_t unused) +{ + struct endp_data *endp = priv; + struct buf_packet *bufp; + int remain = endp->bufpq_size; + + qemu_put_be32(f, endp->bufpq_size); + QTAILQ_FOREACH(bufp, &endp->bufpq, next) { + qemu_put_be32(f, bufp->len); + qemu_put_be32(f, bufp->status); + qemu_put_buffer(f, bufp->data, bufp->len); + remain--; + } + assert(remain == 0); +} + +static int usbredir_get_bufpq(QEMUFile *f, void *priv, size_t unused) +{ + struct endp_data *endp = priv; + struct buf_packet *bufp; + int i; + + endp->bufpq_size = qemu_get_be32(f); + for (i = 0; i < endp->bufpq_size; i++) { + bufp = g_malloc(sizeof(struct buf_packet)); + bufp->len = qemu_get_be32(f); + bufp->status = qemu_get_be32(f); + bufp->data = qemu_oom_check(malloc(bufp->len)); /* regular malloc! */ + qemu_get_buffer(f, bufp->data, bufp->len); + QTAILQ_INSERT_TAIL(&endp->bufpq, bufp, next); + } + return 0; +} + +static const VMStateInfo usbredir_ep_bufpq_vmstate_info = { + .name = "usb-redir-bufpq", + .put = usbredir_put_bufpq, + .get = usbredir_get_bufpq, +}; + + +/* For endp_data migration */ +static const VMStateDescription usbredir_ep_vmstate = { + .name = "usb-redir-ep", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8(type, struct endp_data), + VMSTATE_UINT8(interval, struct endp_data), + VMSTATE_UINT8(interface, struct endp_data), + VMSTATE_UINT16(max_packet_size, struct endp_data), + VMSTATE_UINT8(iso_started, struct endp_data), + VMSTATE_UINT8(iso_error, struct endp_data), + VMSTATE_UINT8(interrupt_started, struct endp_data), + VMSTATE_UINT8(interrupt_error, struct endp_data), + VMSTATE_UINT8(bufpq_prefilled, struct endp_data), + VMSTATE_UINT8(bufpq_dropping_packets, struct endp_data), + { + .name = "bufpq", + .version_id = 0, + .field_exists = NULL, + .size = 0, + .info = &usbredir_ep_bufpq_vmstate_info, + .flags = VMS_SINGLE, + .offset = 0, + }, + VMSTATE_INT32(bufpq_target_size, struct endp_data), + VMSTATE_END_OF_LIST() + } +}; + + +/* For PacketIdQueue migration */ +static void usbredir_put_packet_id_q(QEMUFile *f, void *priv, size_t unused) +{ + struct PacketIdQueue *q = priv; + USBRedirDevice *dev = q->dev; + struct PacketIdQueueEntry *e; + int remain = q->size; + + DPRINTF("put_packet_id_q %s size %d\n", q->name, q->size); + qemu_put_be32(f, q->size); + QTAILQ_FOREACH(e, &q->head, next) { + qemu_put_be64(f, e->id); + remain--; + } + assert(remain == 0); +} + +static int usbredir_get_packet_id_q(QEMUFile *f, void *priv, size_t unused) +{ + struct PacketIdQueue *q = priv; + USBRedirDevice *dev = q->dev; + int i, size; + uint64_t id; + + size = qemu_get_be32(f); + DPRINTF("get_packet_id_q %s size %d\n", q->name, size); + for (i = 0; i < size; i++) { + id = qemu_get_be64(f); + packet_id_queue_add(q, id); + } + assert(q->size == size); + return 0; +} + +static const VMStateInfo usbredir_ep_packet_id_q_vmstate_info = { + .name = "usb-redir-packet-id-q", + .put = usbredir_put_packet_id_q, + .get = usbredir_get_packet_id_q, +}; + +static const VMStateDescription usbredir_ep_packet_id_queue_vmstate = { + .name = "usb-redir-packet-id-queue", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + { + .name = "queue", + .version_id = 0, + .field_exists = NULL, + .size = 0, + .info = &usbredir_ep_packet_id_q_vmstate_info, + .flags = VMS_SINGLE, + .offset = 0, + }, + VMSTATE_END_OF_LIST() + } +}; + + +/* For usb_redir_device_connect_header migration */ +static const VMStateDescription usbredir_device_info_vmstate = { + .name = "usb-redir-device-info", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8(speed, struct usb_redir_device_connect_header), + VMSTATE_UINT8(device_class, struct usb_redir_device_connect_header), + VMSTATE_UINT8(device_subclass, struct usb_redir_device_connect_header), + VMSTATE_UINT8(device_protocol, struct usb_redir_device_connect_header), + VMSTATE_UINT16(vendor_id, struct usb_redir_device_connect_header), + VMSTATE_UINT16(product_id, struct usb_redir_device_connect_header), + VMSTATE_UINT16(device_version_bcd, + struct usb_redir_device_connect_header), + VMSTATE_END_OF_LIST() + } +}; + + +/* For usb_redir_interface_info_header migration */ +static const VMStateDescription usbredir_interface_info_vmstate = { + .name = "usb-redir-interface-info", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(interface_count, + struct usb_redir_interface_info_header), + VMSTATE_UINT8_ARRAY(interface, + struct usb_redir_interface_info_header, 32), + VMSTATE_UINT8_ARRAY(interface_class, + struct usb_redir_interface_info_header, 32), + VMSTATE_UINT8_ARRAY(interface_subclass, + struct usb_redir_interface_info_header, 32), + VMSTATE_UINT8_ARRAY(interface_protocol, + struct usb_redir_interface_info_header, 32), + VMSTATE_END_OF_LIST() + } +}; + + +/* And finally the USBRedirDevice vmstate itself */ +static const VMStateDescription usbredir_vmstate = { + .name = "usb-redir", + .version_id = 1, + .minimum_version_id = 1, + .pre_save = usbredir_pre_save, + .post_load = usbredir_post_load, + .fields = (VMStateField[]) { + VMSTATE_USB_DEVICE(dev, USBRedirDevice), + VMSTATE_TIMER(attach_timer, USBRedirDevice), + { + .name = "parser", + .version_id = 0, + .field_exists = NULL, + .size = 0, + .info = &usbredir_parser_vmstate_info, + .flags = VMS_SINGLE, + .offset = 0, + }, + VMSTATE_STRUCT_ARRAY(endpoint, USBRedirDevice, MAX_ENDPOINTS, 1, + usbredir_ep_vmstate, struct endp_data), + VMSTATE_STRUCT(cancelled, USBRedirDevice, 1, + usbredir_ep_packet_id_queue_vmstate, + struct PacketIdQueue), + VMSTATE_STRUCT(already_in_flight, USBRedirDevice, 1, + usbredir_ep_packet_id_queue_vmstate, + struct PacketIdQueue), + VMSTATE_STRUCT(device_info, USBRedirDevice, 1, + usbredir_device_info_vmstate, + struct usb_redir_device_connect_header), + VMSTATE_STRUCT(interface_info, USBRedirDevice, 1, + usbredir_interface_info_vmstate, + struct usb_redir_interface_info_header), + VMSTATE_END_OF_LIST() + } +}; + static Property usbredir_properties[] = { DEFINE_PROP_CHR("chardev", USBRedirDevice, cs), DEFINE_PROP_UINT8("debug", USBRedirDevice, debug, 0), @@ -1438,6 +1869,7 @@ static void usbredir_class_initfn(ObjectClass *klass, void *data) uc->handle_reset = usbredir_handle_reset; uc->handle_data = usbredir_handle_data; uc->handle_control = usbredir_handle_control; + dc->vmsd = &usbredir_vmstate; dc->props = usbredir_properties; } diff --git a/hw/vga-isa-mm.c b/hw/vga-isa-mm.c index 44ae7d92c8..306e6ba443 100644 --- a/hw/vga-isa-mm.c +++ b/hw/vga-isa-mm.c @@ -107,6 +107,7 @@ static void vga_mm_init(ISAVGAMMState *s, target_phys_addr_t vram_base, s_ioport_ctrl = g_malloc(sizeof(*s_ioport_ctrl)); memory_region_init_io(s_ioport_ctrl, &vga_mm_ctrl_ops, s, "vga-mm-ctrl", 0x100000); + memory_region_set_flush_coalesced(s_ioport_ctrl); vga_io_memory = g_malloc(sizeof(*vga_io_memory)); /* XXX: endianness? */ diff --git a/hw/vga.c b/hw/vga.c index 80299ea1d4..afaef0d711 100644 --- a/hw/vga.c +++ b/hw/vga.c @@ -361,6 +361,8 @@ uint32_t vga_ioport_read(void *opaque, uint32_t addr) VGACommonState *s = opaque; int val, index; + qemu_flush_coalesced_mmio_buffer(); + if (vga_ioport_invalid(s, addr)) { val = 0xff; } else { @@ -453,6 +455,8 @@ void vga_ioport_write(void *opaque, uint32_t addr, uint32_t val) VGACommonState *s = opaque; int index; + qemu_flush_coalesced_mmio_buffer(); + /* check port range access depending on color/monochrome mode */ if (vga_ioport_invalid(s, addr)) { return; @@ -2338,6 +2342,7 @@ MemoryRegion *vga_init_io(VGACommonState *s, vga_mem = g_malloc(sizeof(*vga_mem)); memory_region_init_io(vga_mem, &vga_mem_ops, s, "vga-lowmem", 0x20000); + memory_region_set_flush_coalesced(vga_mem); return vga_mem; } diff --git a/hw/virtio-net.c b/hw/virtio-net.c index b1998b27d3..6490743290 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -447,10 +447,6 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) VirtIONet *n = to_virtio_net(vdev); qemu_flush_queued_packets(&n->nic->nc); - - /* We now have RX buffers, signal to the IO thread to break out of the - * select to re-poll the tap file descriptor */ - qemu_notify_event(); } static int virtio_net_can_receive(NetClientState *nc) diff --git a/hw/vmware_vga.c b/hw/vmware_vga.c index b68e88367f..e815a04e9f 100644 --- a/hw/vmware_vga.c +++ b/hw/vmware_vga.c @@ -1186,6 +1186,7 @@ static int pci_vmsvga_initfn(PCIDevice *dev) memory_region_init_io(&s->io_bar, &vmsvga_io_ops, &s->chip, "vmsvga-io", 0x10); + memory_region_set_flush_coalesced(&s->io_bar); pci_register_bar(&s->card, 0, PCI_BASE_ADDRESS_SPACE_IO, &s->io_bar); vmsvga_init(&s->chip, pci_address_space(dev), diff --git a/hw/xen-host-pci-device.c b/hw/xen-host-pci-device.c index e7ff680ef2..743b37b991 100644 --- a/hw/xen-host-pci-device.c +++ b/hw/xen-host-pci-device.c @@ -47,13 +47,13 @@ static int xen_host_pci_sysfs_path(const XenHostPCIDevice *d, } -/* This size should be enough to read the first 7 lines of a ressource file */ -#define XEN_HOST_PCI_RESSOURCE_BUFFER_SIZE 400 +/* This size should be enough to read the first 7 lines of a resource file */ +#define XEN_HOST_PCI_RESOURCE_BUFFER_SIZE 400 static int xen_host_pci_get_resource(XenHostPCIDevice *d) { int i, rc, fd; char path[PATH_MAX]; - char buf[XEN_HOST_PCI_RESSOURCE_BUFFER_SIZE]; + char buf[XEN_HOST_PCI_RESOURCE_BUFFER_SIZE]; unsigned long long start, end, flags, size; char *endptr, *s; uint8_t type; diff --git a/hw/xen_nic.c b/hw/xen_nic.c index 8b79bfb73e..cf7d5591b3 100644 --- a/hw/xen_nic.c +++ b/hw/xen_nic.c @@ -415,6 +415,7 @@ static void net_event(struct XenDevice *xendev) { struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev); net_tx_packets(netdev); + qemu_flush_queued_packets(&netdev->nic->nc); } static int net_free(struct XenDevice *xendev) diff --git a/hw/xen_pt.h b/hw/xen_pt.h index 41904ece93..112477a881 100644 --- a/hw/xen_pt.h +++ b/hw/xen_pt.h @@ -96,7 +96,7 @@ typedef struct XenPTRegion { * - do NOT use ALL F for init_val, otherwise the tbl will not be registered. */ -/* emulated register infomation */ +/* emulated register information */ struct XenPTRegInfo { uint32_t offset; uint32_t size; @@ -140,7 +140,7 @@ typedef int (*xen_pt_reg_size_init_fn) (XenPCIPassthroughState *, const XenPTRegGroupInfo *, uint32_t base_offset, uint8_t *size); -/* emulated register group infomation */ +/* emulated register group information */ struct XenPTRegGroupInfo { uint8_t grp_id; XenPTRegisterGroupType grp_type; diff --git a/hw/xen_pt_config_init.c b/hw/xen_pt_config_init.c index 00eb3d997d..e524a4094d 100644 --- a/hw/xen_pt_config_init.c +++ b/hw/xen_pt_config_init.c @@ -562,7 +562,7 @@ static int xen_pt_exp_rom_bar_reg_write(XenPCIPassthroughState *s, return 0; } -/* Header Type0 reg static infomation table */ +/* Header Type0 reg static information table */ static XenPTRegInfo xen_pt_emu_reg_header0[] = { /* Vendor ID reg */ { @@ -753,7 +753,7 @@ static XenPTRegInfo xen_pt_emu_reg_header0[] = { * Vital Product Data Capability */ -/* Vital Product Data Capability Structure reg static infomation table */ +/* Vital Product Data Capability Structure reg static information table */ static XenPTRegInfo xen_pt_emu_reg_vpd[] = { { .offset = PCI_CAP_LIST_NEXT, @@ -775,7 +775,7 @@ static XenPTRegInfo xen_pt_emu_reg_vpd[] = { * Vendor Specific Capability */ -/* Vendor Specific Capability Structure reg static infomation table */ +/* Vendor Specific Capability Structure reg static information table */ static XenPTRegInfo xen_pt_emu_reg_vendor[] = { { .offset = PCI_CAP_LIST_NEXT, @@ -866,7 +866,7 @@ static int xen_pt_linkctrl2_reg_init(XenPCIPassthroughState *s, return 0; } -/* PCI Express Capability Structure reg static infomation table */ +/* PCI Express Capability Structure reg static information table */ static XenPTRegInfo xen_pt_emu_reg_pcie[] = { /* Next Pointer reg */ { @@ -981,7 +981,7 @@ static int xen_pt_pmcsr_reg_write(XenPCIPassthroughState *s, return 0; } -/* Power Management Capability reg static infomation table */ +/* Power Management Capability reg static information table */ static XenPTRegInfo xen_pt_emu_reg_pm[] = { /* Next Pointer reg */ { @@ -1259,7 +1259,7 @@ static int xen_pt_msgdata_reg_write(XenPCIPassthroughState *s, return 0; } -/* MSI Capability Structure reg static infomation table */ +/* MSI Capability Structure reg static information table */ static XenPTRegInfo xen_pt_emu_reg_msi[] = { /* Next Pointer reg */ { @@ -1396,7 +1396,7 @@ static int xen_pt_msixctrl_reg_write(XenPCIPassthroughState *s, return 0; } -/* MSI-X Capability Structure reg static infomation table */ +/* MSI-X Capability Structure reg static information table */ static XenPTRegInfo xen_pt_emu_reg_msix[] = { /* Next Pointer reg */ { diff --git a/hw/xilinx.h b/hw/xilinx.h index 556c5aa9f1..98300477ab 100644 --- a/hw/xilinx.h +++ b/hw/xilinx.h @@ -21,9 +21,9 @@ xilinx_timer_create(target_phys_addr_t base, qemu_irq irq, int oto, int freq) { DeviceState *dev; - dev = qdev_create(NULL, "xlnx,xps-timer"); + dev = qdev_create(NULL, "xlnx.xps-timer"); qdev_prop_set_uint32(dev, "one-timer-only", oto); - qdev_prop_set_uint32(dev, "frequency", freq); + qdev_prop_set_uint32(dev, "clock-frequency", freq); qdev_init_nofail(dev); sysbus_mmio_map(sysbus_from_qdev(dev), 0, base); sysbus_connect_irq(sysbus_from_qdev(dev), 0, irq); @@ -55,13 +55,17 @@ xilinx_axiethernet_create(NICInfo *nd, StreamSlave *peer, int txmem, int rxmem) { DeviceState *dev; + Error *errp = NULL; + qemu_check_nic_model(nd, "xlnx.axi-ethernet"); dev = qdev_create(NULL, "xlnx.axi-ethernet"); qdev_set_nic_properties(dev, nd); qdev_prop_set_uint32(dev, "rxmem", rxmem); qdev_prop_set_uint32(dev, "txmem", txmem); - object_property_set_link(OBJECT(dev), OBJECT(peer), "tx_dev", NULL); + object_property_set_link(OBJECT(dev), OBJECT(peer), "axistream-connected", + &errp); + assert_no_error(errp); qdev_init_nofail(dev); sysbus_mmio_map(sysbus_from_qdev(dev), 0, base); sysbus_connect_irq(sysbus_from_qdev(dev), 0, irq); @@ -74,8 +78,12 @@ xilinx_axiethernetdma_init(DeviceState *dev, StreamSlave *peer, target_phys_addr_t base, qemu_irq irq, qemu_irq irq2, int freqhz) { + Error *errp = NULL; + qdev_prop_set_uint32(dev, "freqhz", freqhz); - object_property_set_link(OBJECT(dev), OBJECT(peer), "tx_dev", NULL); + object_property_set_link(OBJECT(dev), OBJECT(peer), "axistream-connected", + &errp); + assert_no_error(errp); qdev_init_nofail(dev); sysbus_mmio_map(sysbus_from_qdev(dev), 0, base); diff --git a/hw/xilinx_timer.c b/hw/xilinx_timer.c index b562bd065e..2e48ca2bee 100644 --- a/hw/xilinx_timer.c +++ b/hw/xilinx_timer.c @@ -24,6 +24,7 @@ #include "sysbus.h" #include "ptimer.h" +#include "qemu-log.h" #define D(x) @@ -119,7 +120,7 @@ timer_read(void *opaque, target_phys_addr_t addr, unsigned int size) break; } - D(printf("%s timer=%d %x=%x\n", __func__, timer, addr * 4, r)); + D(fprintf(stderr, "%s timer=%d %x=%x\n", __func__, timer, addr * 4, r)); return r; } @@ -127,7 +128,7 @@ static void timer_enable(struct xlx_timer *xt) { uint64_t count; - D(printf("%s timer=%d down=%d\n", __func__, + D(fprintf(stderr, "%s timer=%d down=%d\n", __func__, xt->nr, xt->regs[R_TCSR] & TCSR_UDT)); ptimer_stop(xt->ptimer); @@ -152,7 +153,7 @@ timer_write(void *opaque, target_phys_addr_t addr, addr >>= 2; timer = timer_from_addr(addr); xt = &t->timers[timer]; - D(printf("%s addr=%x val=%x (timer=%d off=%d)\n", + D(fprintf(stderr, "%s addr=%x val=%x (timer=%d off=%d)\n", __func__, addr * 4, value, timer, addr & 3)); /* Further decoding to address a specific timers reg. */ addr &= 3; @@ -189,7 +190,7 @@ static void timer_hit(void *opaque) { struct xlx_timer *xt = opaque; struct timerblock *t = xt->parent; - D(printf("%s %d\n", __func__, timer)); + D(fprintf(stderr, "%s %d\n", __func__, xt->nr)); xt->regs[R_TCSR] |= TCSR_TINT; if (xt->regs[R_TCSR] & TCSR_ARHT) @@ -217,14 +218,15 @@ static int xilinx_timer_init(SysBusDevice *dev) ptimer_set_freq(xt->ptimer, t->freq_hz); } - memory_region_init_io(&t->mmio, &timer_ops, t, "xlnx,xps-timer", + memory_region_init_io(&t->mmio, &timer_ops, t, "xlnx.xps-timer", R_MAX * 4 * num_timers(t)); sysbus_init_mmio(dev, &t->mmio); return 0; } static Property xilinx_timer_properties[] = { - DEFINE_PROP_UINT32("frequency", struct timerblock, freq_hz, 62 * 1000000), + DEFINE_PROP_UINT32("clock-frequency", struct timerblock, freq_hz, + 62 * 1000000), DEFINE_PROP_UINT8("one-timer-only", struct timerblock, one_timer_only, 0), DEFINE_PROP_END_OF_LIST(), }; @@ -239,7 +241,7 @@ static void xilinx_timer_class_init(ObjectClass *klass, void *data) } static TypeInfo xilinx_timer_info = { - .name = "xlnx,xps-timer", + .name = "xlnx.xps-timer", .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(struct timerblock), .class_init = xilinx_timer_class_init, diff --git a/iohandler.c b/iohandler.c index dea43552d2..a2d871bb91 100644 --- a/iohandler.c +++ b/iohandler.c @@ -56,6 +56,8 @@ int qemu_set_fd_handler2(int fd, { IOHandlerRecord *ioh; + assert(fd >= 0); + if (!fd_read && !fd_write) { QLIST_FOREACH(ioh, &io_handlers, next) { if (ioh->fd == fd) { diff --git a/kvm-all.c b/kvm-all.c index 39cff55f5b..92a71374ed 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -92,7 +92,7 @@ struct KVMState /* The man page (and posix) say ioctl numbers are signed int, but * they're not. Linux, glibc and *BSD all treat ioctl numbers as * unsigned, and treating them as signed here can break things */ - unsigned irqchip_inject_ioctl; + unsigned irq_set_ioctl; #ifdef KVM_CAP_IRQ_ROUTING struct kvm_irq_routing *irq_routes; int nr_allocated_irq_routes; @@ -870,13 +870,13 @@ int kvm_set_irq(KVMState *s, int irq, int level) event.level = level; event.irq = irq; - ret = kvm_vm_ioctl(s, s->irqchip_inject_ioctl, &event); + ret = kvm_vm_ioctl(s, s->irq_set_ioctl, &event); if (ret < 0) { perror("kvm_set_irq"); abort(); } - return (s->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status; + return (s->irq_set_ioctl == KVM_IRQ_LINE) ? 1 : event.status; } #ifdef KVM_CAP_IRQ_ROUTING @@ -1237,10 +1237,6 @@ static int kvm_irqchip_create(KVMState *s) return ret; } - s->irqchip_inject_ioctl = KVM_IRQ_LINE; - if (kvm_check_extension(s, KVM_CAP_IRQ_INJECT_STATUS)) { - s->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS; - } kvm_kernel_irqchip = true; /* If we have an in-kernel IRQ chip then we must have asynchronous * interrupt delivery (though the reverse is not necessarily true) @@ -1389,6 +1385,11 @@ int kvm_init(void) s->intx_set_mask = kvm_check_extension(s, KVM_CAP_PCI_2_3); + s->irq_set_ioctl = KVM_IRQ_LINE; + if (kvm_check_extension(s, KVM_CAP_IRQ_INJECT_STATUS)) { + s->irq_set_ioctl = KVM_IRQ_LINE_STATUS; + } + ret = kvm_arch_init(s); if (ret < 0) { goto err; @@ -1409,13 +1410,11 @@ int kvm_init(void) return 0; err: - if (s) { - if (s->vmfd >= 0) { - close(s->vmfd); - } - if (s->fd != -1) { - close(s->fd); - } + if (s->vmfd >= 0) { + close(s->vmfd); + } + if (s->fd != -1) { + close(s->fd); } g_free(s); @@ -1576,8 +1575,6 @@ int kvm_cpu_exec(CPUArchState *env) qemu_mutex_lock_iothread(); kvm_arch_post_run(env, run); - kvm_flush_coalesced_mmio_buffer(); - if (run_ret < 0) { if (run_ret == -EINTR || run_ret == -EAGAIN) { DPRINTF("io window exit\n"); diff --git a/linux-user/main.c b/linux-user/main.c index 1a1c661ee4..e84a18c33c 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -89,19 +89,6 @@ int cpu_get_pic_interrupt(CPUX86State *env) } #endif -/* timers for rdtsc */ - -#if 0 - -static uint64_t emu_time; - -int64_t cpu_get_real_ticks(void) -{ - return emu_time++; -} - -#endif - #if defined(CONFIG_USE_NPTL) /***********************************************************/ /* Helper routines for implementing atomic operations. */ diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 69b27d7146..fc4cc00b9f 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -289,46 +289,29 @@ static inline int access_ok(int type, abi_ulong addr, abi_ulong size) * struct has been locked - usually with lock_user_struct(). */ #define __put_user(x, hptr)\ -({\ +({ __typeof(*hptr) pu_ = (x);\ switch(sizeof(*hptr)) {\ - case 1:\ - *(uint8_t *)(hptr) = (uint8_t)(typeof(*hptr))(x);\ - break;\ - case 2:\ - *(uint16_t *)(hptr) = tswap16((uint16_t)(typeof(*hptr))(x));\ - break;\ - case 4:\ - *(uint32_t *)(hptr) = tswap32((uint32_t)(typeof(*hptr))(x));\ - break;\ - case 8:\ - *(uint64_t *)(hptr) = tswap64((typeof(*hptr))(x));\ - break;\ - default:\ - abort();\ + case 1: break;\ + case 2: pu_ = tswap16(pu_); break; \ + case 4: pu_ = tswap32(pu_); break; \ + case 8: pu_ = tswap64(pu_); break; \ + default: abort();\ }\ + memcpy(hptr, &pu_, sizeof(pu_)); \ 0;\ }) #define __get_user(x, hptr) \ -({\ +({ __typeof(*hptr) gu_; \ + memcpy(&gu_, hptr, sizeof(gu_)); \ switch(sizeof(*hptr)) {\ - case 1:\ - x = (typeof(*hptr))*(uint8_t *)(hptr);\ - break;\ - case 2:\ - x = (typeof(*hptr))tswap16(*(uint16_t *)(hptr));\ - break;\ - case 4:\ - x = (typeof(*hptr))tswap32(*(uint32_t *)(hptr));\ - break;\ - case 8:\ - x = (typeof(*hptr))tswap64(*(uint64_t *)(hptr));\ - break;\ - default:\ - /* avoid warning */\ - x = 0;\ - abort();\ + case 1: break; \ + case 2: gu_ = tswap16(gu_); break; \ + case 4: gu_ = tswap32(gu_); break; \ + case 8: gu_ = tswap64(gu_); break; \ + default: abort();\ }\ + (x) = gu_; \ 0;\ }) diff --git a/memory.c b/memory.c index d528d1f7f3..4f3ade06dd 100644 --- a/memory.c +++ b/memory.c @@ -24,7 +24,6 @@ #include "exec-obsolete.h" unsigned memory_region_transaction_depth = 0; -static bool memory_region_update_pending = false; static bool global_dirty_log = false; static QTAILQ_HEAD(memory_listeners, MemoryListener) memory_listeners @@ -311,6 +310,9 @@ static void memory_region_read_accessor(void *opaque, MemoryRegion *mr = opaque; uint64_t tmp; + if (mr->flush_coalesced_mmio) { + qemu_flush_coalesced_mmio_buffer(); + } tmp = mr->ops->read(mr->opaque, addr, size); *value |= (tmp & mask) << shift; } @@ -325,6 +327,9 @@ static void memory_region_write_accessor(void *opaque, MemoryRegion *mr = opaque; uint64_t tmp; + if (mr->flush_coalesced_mmio) { + qemu_flush_coalesced_mmio_buffer(); + } tmp = (*value >> shift) & mask; mr->ops->write(mr->opaque, addr, tmp, size); } @@ -726,33 +731,9 @@ static void address_space_update_topology(AddressSpace *as) address_space_update_ioeventfds(as); } -static void memory_region_update_topology(MemoryRegion *mr) -{ - if (memory_region_transaction_depth) { - memory_region_update_pending |= !mr || mr->enabled; - return; - } - - if (mr && !mr->enabled) { - return; - } - - MEMORY_LISTENER_CALL_GLOBAL(begin, Forward); - - if (address_space_memory.root) { - address_space_update_topology(&address_space_memory); - } - if (address_space_io.root) { - address_space_update_topology(&address_space_io); - } - - MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); - - memory_region_update_pending = false; -} - void memory_region_transaction_begin(void) { + qemu_flush_coalesced_mmio_buffer(); ++memory_region_transaction_depth; } @@ -760,8 +741,17 @@ void memory_region_transaction_commit(void) { assert(memory_region_transaction_depth); --memory_region_transaction_depth; - if (!memory_region_transaction_depth && memory_region_update_pending) { - memory_region_update_topology(NULL); + if (!memory_region_transaction_depth) { + MEMORY_LISTENER_CALL_GLOBAL(begin, Forward); + + if (address_space_memory.root) { + address_space_update_topology(&address_space_memory); + } + if (address_space_io.root) { + address_space_update_topology(&address_space_io); + } + + MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); } } @@ -826,6 +816,7 @@ void memory_region_init(MemoryRegion *mr, mr->dirty_log_mask = 0; mr->ioeventfd_nb = 0; mr->ioeventfds = NULL; + mr->flush_coalesced_mmio = false; } static bool memory_region_access_valid(MemoryRegion *mr, @@ -1069,8 +1060,9 @@ void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) { uint8_t mask = 1 << client; + memory_region_transaction_begin(); mr->dirty_log_mask = (mr->dirty_log_mask & ~mask) | (log * mask); - memory_region_update_topology(mr); + memory_region_transaction_commit(); } bool memory_region_get_dirty(MemoryRegion *mr, target_phys_addr_t addr, @@ -1103,16 +1095,18 @@ void memory_region_sync_dirty_bitmap(MemoryRegion *mr) void memory_region_set_readonly(MemoryRegion *mr, bool readonly) { if (mr->readonly != readonly) { + memory_region_transaction_begin(); mr->readonly = readonly; - memory_region_update_topology(mr); + memory_region_transaction_commit(); } } void memory_region_rom_device_set_readable(MemoryRegion *mr, bool readable) { if (mr->readable != readable) { + memory_region_transaction_begin(); mr->readable = readable; - memory_region_update_topology(mr); + memory_region_transaction_commit(); } } @@ -1176,12 +1170,16 @@ void memory_region_add_coalescing(MemoryRegion *mr, cmr->addr = addrrange_make(int128_make64(offset), int128_make64(size)); QTAILQ_INSERT_TAIL(&mr->coalesced, cmr, link); memory_region_update_coalesced_range(mr); + memory_region_set_flush_coalesced(mr); } void memory_region_clear_coalescing(MemoryRegion *mr) { CoalescedMemoryRange *cmr; + qemu_flush_coalesced_mmio_buffer(); + mr->flush_coalesced_mmio = false; + while (!QTAILQ_EMPTY(&mr->coalesced)) { cmr = QTAILQ_FIRST(&mr->coalesced); QTAILQ_REMOVE(&mr->coalesced, cmr, link); @@ -1190,6 +1188,19 @@ void memory_region_clear_coalescing(MemoryRegion *mr) memory_region_update_coalesced_range(mr); } +void memory_region_set_flush_coalesced(MemoryRegion *mr) +{ + mr->flush_coalesced_mmio = true; +} + +void memory_region_clear_flush_coalesced(MemoryRegion *mr) +{ + qemu_flush_coalesced_mmio_buffer(); + if (QTAILQ_EMPTY(&mr->coalesced)) { + mr->flush_coalesced_mmio = false; + } +} + void memory_region_add_eventfd(MemoryRegion *mr, target_phys_addr_t addr, unsigned size, @@ -1206,6 +1217,7 @@ void memory_region_add_eventfd(MemoryRegion *mr, }; unsigned i; + memory_region_transaction_begin(); for (i = 0; i < mr->ioeventfd_nb; ++i) { if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) { break; @@ -1217,7 +1229,7 @@ void memory_region_add_eventfd(MemoryRegion *mr, memmove(&mr->ioeventfds[i+1], &mr->ioeventfds[i], sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb-1 - i)); mr->ioeventfds[i] = mrfd; - memory_region_update_topology(mr); + memory_region_transaction_commit(); } void memory_region_del_eventfd(MemoryRegion *mr, @@ -1236,6 +1248,7 @@ void memory_region_del_eventfd(MemoryRegion *mr, }; unsigned i; + memory_region_transaction_begin(); for (i = 0; i < mr->ioeventfd_nb; ++i) { if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) { break; @@ -1247,7 +1260,7 @@ void memory_region_del_eventfd(MemoryRegion *mr, --mr->ioeventfd_nb; mr->ioeventfds = g_realloc(mr->ioeventfds, sizeof(*mr->ioeventfds)*mr->ioeventfd_nb + 1); - memory_region_update_topology(mr); + memory_region_transaction_commit(); } static void memory_region_add_subregion_common(MemoryRegion *mr, @@ -1256,6 +1269,8 @@ static void memory_region_add_subregion_common(MemoryRegion *mr, { MemoryRegion *other; + memory_region_transaction_begin(); + assert(!subregion->parent); subregion->parent = mr; subregion->addr = offset; @@ -1288,7 +1303,7 @@ static void memory_region_add_subregion_common(MemoryRegion *mr, } QTAILQ_INSERT_TAIL(&mr->subregions, subregion, subregions_link); done: - memory_region_update_topology(mr); + memory_region_transaction_commit(); } @@ -1314,10 +1329,11 @@ void memory_region_add_subregion_overlap(MemoryRegion *mr, void memory_region_del_subregion(MemoryRegion *mr, MemoryRegion *subregion) { + memory_region_transaction_begin(); assert(subregion->parent == mr); subregion->parent = NULL; QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link); - memory_region_update_topology(mr); + memory_region_transaction_commit(); } void memory_region_set_enabled(MemoryRegion *mr, bool enabled) @@ -1325,8 +1341,9 @@ void memory_region_set_enabled(MemoryRegion *mr, bool enabled) if (enabled == mr->enabled) { return; } + memory_region_transaction_begin(); mr->enabled = enabled; - memory_region_update_topology(NULL); + memory_region_transaction_commit(); } void memory_region_set_address(MemoryRegion *mr, target_phys_addr_t addr) @@ -1352,16 +1369,15 @@ void memory_region_set_address(MemoryRegion *mr, target_phys_addr_t addr) void memory_region_set_alias_offset(MemoryRegion *mr, target_phys_addr_t offset) { - target_phys_addr_t old_offset = mr->alias_offset; - assert(mr->alias); - mr->alias_offset = offset; - if (offset == old_offset || !mr->parent) { + if (offset == mr->alias_offset) { return; } - memory_region_update_topology(mr); + memory_region_transaction_begin(); + mr->alias_offset = offset; + memory_region_transaction_commit(); } ram_addr_t memory_region_get_ram_addr(MemoryRegion *mr) @@ -1493,14 +1509,16 @@ void memory_listener_unregister(MemoryListener *listener) void set_system_memory_map(MemoryRegion *mr) { + memory_region_transaction_begin(); address_space_memory.root = mr; - memory_region_update_topology(NULL); + memory_region_transaction_commit(); } void set_system_io_map(MemoryRegion *mr) { + memory_region_transaction_begin(); address_space_io.root = mr; - memory_region_update_topology(NULL); + memory_region_transaction_commit(); } uint64_t io_mem_read(MemoryRegion *mr, target_phys_addr_t addr, unsigned size) diff --git a/memory.h b/memory.h index bd1bbaeabe..37ce1517ca 100644 --- a/memory.h +++ b/memory.h @@ -133,6 +133,7 @@ struct MemoryRegion { bool enabled; bool rom_device; bool warning_printed; /* For reservations */ + bool flush_coalesced_mmio; MemoryRegion *alias; target_phys_addr_t alias_offset; unsigned priority; @@ -252,9 +253,9 @@ void memory_region_init_ram(MemoryRegion *mr, uint64_t size); /** - * memory_region_init_ram: Initialize RAM memory region from a user-provided. - * pointer. Accesses into the region will modify - * memory directly. + * memory_region_init_ram_ptr: Initialize RAM memory region from a + * user-provided pointer. Accesses into the + * region will modify memory directly. * * @mr: the #MemoryRegion to be initialized. * @name: the name of the region. @@ -521,6 +522,31 @@ void memory_region_add_coalescing(MemoryRegion *mr, void memory_region_clear_coalescing(MemoryRegion *mr); /** + * memory_region_set_flush_coalesced: Enforce memory coalescing flush before + * accesses. + * + * Ensure that pending coalesced MMIO request are flushed before the memory + * region is accessed. This property is automatically enabled for all regions + * passed to memory_region_set_coalescing() and memory_region_add_coalescing(). + * + * @mr: the memory region to be updated. + */ +void memory_region_set_flush_coalesced(MemoryRegion *mr); + +/** + * memory_region_clear_flush_coalesced: Disable memory coalescing flush before + * accesses. + * + * Clear the automatic coalesced MMIO flushing enabled via + * memory_region_set_flush_coalesced. Note that this service has no effect on + * memory regions that have MMIO coalescing enabled for themselves. For them, + * automatic flushing will stop once coalescing is disabled. + * + * @mr: the memory region to be updated. + */ +void memory_region_clear_flush_coalesced(MemoryRegion *mr); + +/** * memory_region_add_eventfd: Request an eventfd to be triggered when a word * is written to a location. * @@ -581,7 +607,8 @@ void memory_region_add_subregion(MemoryRegion *mr, target_phys_addr_t offset, MemoryRegion *subregion); /** - * memory_region_add_subregion: Add a subregion to a container, with overlap. + * memory_region_add_subregion_overlap: Add a subregion to a container + * with overlap. * * Adds a subregion at @offset. The subregion may overlap with other * subregions. Conflicts are resolved by having a higher @priority hide a @@ -743,7 +770,7 @@ void memory_listener_unregister(MemoryListener *listener); void memory_global_dirty_log_start(void); /** - * memory_global_dirty_log_stop: begin dirty logging for all regions + * memory_global_dirty_log_stop: end dirty logging for all regions */ void memory_global_dirty_log_stop(void); diff --git a/net.c b/net.c index e5d25d4b6d..a187a7b3db 100644 --- a/net.c +++ b/net.c @@ -357,7 +357,12 @@ void qemu_flush_queued_packets(NetClientState *nc) { nc->receive_disabled = 0; - qemu_net_queue_flush(nc->send_queue); + if (qemu_net_queue_flush(nc->send_queue)) { + /* We emptied the queue successfully, signal to the IO thread to repoll + * the file descriptor (for tap, for example). + */ + qemu_notify_event(); + } } static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender, @@ -418,16 +423,27 @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender, void *opaque) { NetClientState *nc = opaque; + int ret; if (nc->link_down) { return iov_size(iov, iovcnt); } + if (nc->receive_disabled) { + return 0; + } + if (nc->info->receive_iov) { - return nc->info->receive_iov(nc, iov, iovcnt); + ret = nc->info->receive_iov(nc, iov, iovcnt); } else { - return nc_sendv_compat(nc, iov, iovcnt); + ret = nc_sendv_compat(nc, iov, iovcnt); } + + if (ret == 0) { + nc->receive_disabled = 1; + } + + return ret; } ssize_t qemu_sendv_packet_async(NetClientState *sender, diff --git a/net/hub.c b/net/hub.c index ac157e32ee..650a8b4a40 100644 --- a/net/hub.c +++ b/net/hub.c @@ -97,12 +97,12 @@ static int net_hub_port_can_receive(NetClientState *nc) continue; } - if (!qemu_can_send_packet(&port->nc)) { - return 0; + if (qemu_can_send_packet(&port->nc)) { + return 1; } } - return 1; + return 0; } static ssize_t net_hub_port_receive(NetClientState *nc, diff --git a/net/queue.c b/net/queue.c index e8030aafe4..254f28013a 100644 --- a/net/queue.c +++ b/net/queue.c @@ -83,12 +83,12 @@ void qemu_del_net_queue(NetQueue *queue) g_free(queue); } -static ssize_t qemu_net_queue_append(NetQueue *queue, - NetClientState *sender, - unsigned flags, - const uint8_t *buf, - size_t size, - NetPacketSent *sent_cb) +static void qemu_net_queue_append(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const uint8_t *buf, + size_t size, + NetPacketSent *sent_cb) { NetPacket *packet; @@ -100,16 +100,14 @@ static ssize_t qemu_net_queue_append(NetQueue *queue, memcpy(packet->data, buf, size); QTAILQ_INSERT_TAIL(&queue->packets, packet, entry); - - return size; } -static ssize_t qemu_net_queue_append_iov(NetQueue *queue, - NetClientState *sender, - unsigned flags, - const struct iovec *iov, - int iovcnt, - NetPacketSent *sent_cb) +static void qemu_net_queue_append_iov(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) { NetPacket *packet; size_t max_len = 0; @@ -133,8 +131,6 @@ static ssize_t qemu_net_queue_append_iov(NetQueue *queue, } QTAILQ_INSERT_TAIL(&queue->packets, packet, entry); - - return packet->size; } static ssize_t qemu_net_queue_deliver(NetQueue *queue, @@ -177,7 +173,8 @@ ssize_t qemu_net_queue_send(NetQueue *queue, ssize_t ret; if (queue->delivering || !qemu_can_send_packet(sender)) { - return qemu_net_queue_append(queue, sender, flags, data, size, sent_cb); + qemu_net_queue_append(queue, sender, flags, data, size, sent_cb); + return 0; } ret = qemu_net_queue_deliver(queue, sender, flags, data, size); @@ -201,8 +198,8 @@ ssize_t qemu_net_queue_send_iov(NetQueue *queue, ssize_t ret; if (queue->delivering || !qemu_can_send_packet(sender)) { - return qemu_net_queue_append_iov(queue, sender, flags, - iov, iovcnt, sent_cb); + qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt, sent_cb); + return 0; } ret = qemu_net_queue_deliver_iov(queue, sender, flags, iov, iovcnt); @@ -228,7 +225,7 @@ void qemu_net_queue_purge(NetQueue *queue, NetClientState *from) } } -void qemu_net_queue_flush(NetQueue *queue) +bool qemu_net_queue_flush(NetQueue *queue) { while (!QTAILQ_EMPTY(&queue->packets)) { NetPacket *packet; @@ -244,7 +241,7 @@ void qemu_net_queue_flush(NetQueue *queue) packet->size); if (ret == 0) { QTAILQ_INSERT_HEAD(&queue->packets, packet, entry); - break; + return false; } if (packet->sent_cb) { @@ -253,4 +250,5 @@ void qemu_net_queue_flush(NetQueue *queue) g_free(packet); } + return true; } diff --git a/net/queue.h b/net/queue.h index 9d44a9b3b8..fc02b33915 100644 --- a/net/queue.h +++ b/net/queue.h @@ -53,6 +53,6 @@ ssize_t qemu_net_queue_send_iov(NetQueue *queue, NetPacketSent *sent_cb); void qemu_net_queue_purge(NetQueue *queue, NetClientState *from); -void qemu_net_queue_flush(NetQueue *queue); +bool qemu_net_queue_flush(NetQueue *queue); #endif /* QEMU_NET_QUEUE_H */ diff --git a/net/socket.c b/net/socket.c index 7c602e4c3a..5e0c92e062 100644 --- a/net/socket.c +++ b/net/socket.c @@ -32,6 +32,7 @@ #include "qemu-error.h" #include "qemu-option.h" #include "qemu_socket.h" +#include "iov.h" typedef struct NetSocketState { NetClientState nc; @@ -40,29 +41,106 @@ typedef struct NetSocketState { int state; /* 0 = getting length, 1 = getting data */ unsigned int index; unsigned int packet_len; + unsigned int send_index; /* number of bytes sent (only SOCK_STREAM) */ uint8_t buf[4096]; struct sockaddr_in dgram_dst; /* contains inet host and port destination iff connectionless (SOCK_DGRAM) */ + IOHandler *send_fn; /* differs between SOCK_STREAM/SOCK_DGRAM */ + bool read_poll; /* waiting to receive data? */ + bool write_poll; /* waiting to transmit data? */ } NetSocketState; static void net_socket_accept(void *opaque); +static void net_socket_writable(void *opaque); + +/* Only read packets from socket when peer can receive them */ +static int net_socket_can_send(void *opaque) +{ + NetSocketState *s = opaque; + + return qemu_can_send_packet(&s->nc); +} + +static void net_socket_update_fd_handler(NetSocketState *s) +{ + qemu_set_fd_handler2(s->fd, + s->read_poll ? net_socket_can_send : NULL, + s->read_poll ? s->send_fn : NULL, + s->write_poll ? net_socket_writable : NULL, + s); +} + +static void net_socket_read_poll(NetSocketState *s, bool enable) +{ + s->read_poll = enable; + net_socket_update_fd_handler(s); +} + +static void net_socket_write_poll(NetSocketState *s, bool enable) +{ + s->write_poll = enable; + net_socket_update_fd_handler(s); +} + +static void net_socket_writable(void *opaque) +{ + NetSocketState *s = opaque; + + net_socket_write_poll(s, false); + + qemu_flush_queued_packets(&s->nc); +} -/* XXX: we consider we can send the whole packet without blocking */ static ssize_t net_socket_receive(NetClientState *nc, const uint8_t *buf, size_t size) { NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc); - uint32_t len; - len = htonl(size); - - send_all(s->fd, (const uint8_t *)&len, sizeof(len)); - return send_all(s->fd, buf, size); + uint32_t len = htonl(size); + struct iovec iov[] = { + { + .iov_base = &len, + .iov_len = sizeof(len), + }, { + .iov_base = (void *)buf, + .iov_len = size, + }, + }; + size_t remaining; + ssize_t ret; + + remaining = iov_size(iov, 2) - s->send_index; + ret = iov_send(s->fd, iov, 2, s->send_index, remaining); + + if (ret == -1 && errno == EAGAIN) { + ret = 0; /* handled further down */ + } + if (ret == -1) { + s->send_index = 0; + return -errno; + } + if (ret < (ssize_t)remaining) { + s->send_index += ret; + net_socket_write_poll(s, true); + return 0; + } + s->send_index = 0; + return size; } static ssize_t net_socket_receive_dgram(NetClientState *nc, const uint8_t *buf, size_t size) { NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc); + ssize_t ret; + + do { + ret = sendto(s->fd, buf, size, 0, + (struct sockaddr *)&s->dgram_dst, + sizeof(s->dgram_dst)); + } while (ret == -1 && errno == EINTR); - return sendto(s->fd, (const void *)buf, size, 0, - (struct sockaddr *)&s->dgram_dst, sizeof(s->dgram_dst)); + if (ret == -1 && errno == EAGAIN) { + net_socket_write_poll(s, true); + return 0; + } + return ret; } static void net_socket_send(void *opaque) @@ -81,7 +159,8 @@ static void net_socket_send(void *opaque) } else if (size == 0) { /* end of connection */ eoc: - qemu_set_fd_handler(s->fd, NULL, NULL, NULL); + net_socket_read_poll(s, false); + net_socket_write_poll(s, false); if (s->listen_fd != -1) { qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s); } @@ -152,7 +231,8 @@ static void net_socket_send_dgram(void *opaque) return; if (size == 0) { /* end of connection */ - qemu_set_fd_handler(s->fd, NULL, NULL, NULL); + net_socket_read_poll(s, false); + net_socket_write_poll(s, false); return; } qemu_send_packet(&s->nc, s->buf, size); @@ -243,7 +323,8 @@ static void net_socket_cleanup(NetClientState *nc) { NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc); if (s->fd != -1) { - qemu_set_fd_handler(s->fd, NULL, NULL, NULL); + net_socket_read_poll(s, false); + net_socket_write_poll(s, false); close(s->fd); s->fd = -1; } @@ -314,8 +395,8 @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer, s->fd = fd; s->listen_fd = -1; - - qemu_set_fd_handler(s->fd, net_socket_send_dgram, NULL, s); + s->send_fn = net_socket_send_dgram; + net_socket_read_poll(s, true); /* mcast: save bound address as dst */ if (is_connected) { @@ -332,7 +413,8 @@ err: static void net_socket_connect(void *opaque) { NetSocketState *s = opaque; - qemu_set_fd_handler(s->fd, net_socket_send, NULL, s); + s->send_fn = net_socket_send; + net_socket_read_poll(s, true); } static NetClientInfo net_socket_info = { diff --git a/oslib-win32.c b/oslib-win32.c index ffbc6d0c9f..51b33e8b20 100644 --- a/oslib-win32.c +++ b/oslib-win32.c @@ -74,6 +74,30 @@ void qemu_vfree(void *ptr) VirtualFree(ptr, 0, MEM_RELEASE); } +/* FIXME: add proper locking */ +struct tm *gmtime_r(const time_t *timep, struct tm *result) +{ + struct tm *p = gmtime(timep); + memset(result, 0, sizeof(*result)); + if (p) { + *result = *p; + p = result; + } + return p; +} + +/* FIXME: add proper locking */ +struct tm *localtime_r(const time_t *timep, struct tm *result) +{ + struct tm *p = localtime(timep); + memset(result, 0, sizeof(*result)); + if (p) { + *result = *p; + p = result; + } + return p; +} + void socket_set_block(int fd) { unsigned long opt = 0; diff --git a/qapi-schema-guest.json b/qapi-schema-guest.json index d955cf11fb..ed0eb698c6 100644 --- a/qapi-schema-guest.json +++ b/qapi-schema-guest.json @@ -293,7 +293,7 @@ ## # @GuestFsFreezeStatus # -# An enumation of filesystem freeze states +# An enumeration of filesystem freeze states # # @thawed: filesystems thawed/unfrozen # diff --git a/qapi-schema.json b/qapi-schema.json index a9f465a9ff..14e44199b7 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -118,7 +118,7 @@ ## # @RunState # -# An enumation of VM run states. +# An enumeration of VM run states. # # @debug: QEMU is running on a debugger # @@ -156,6 +156,70 @@ 'running', 'save-vm', 'shutdown', 'suspended', 'watchdog' ] } ## +# @SnapshotInfo +# +# @id: unique snapshot id +# +# @name: user chosen name +# +# @vm-state-size: size of the VM state +# +# @date-sec: UTC date of the snapshot in seconds +# +# @date-nsec: fractional part in nano seconds to be used with date-sec +# +# @vm-clock-sec: VM clock relative to boot in seconds +# +# @vm-clock-nsec: fractional part in nano seconds to be used with vm-clock-sec +# +# Since: 1.3 +# +## + +{ 'type': 'SnapshotInfo', + 'data': { 'id': 'str', 'name': 'str', 'vm-state-size': 'int', + 'date-sec': 'int', 'date-nsec': 'int', + 'vm-clock-sec': 'int', 'vm-clock-nsec': 'int' } } + +## +# @ImageInfo: +# +# Information about a QEMU image file +# +# @filename: name of the image file +# +# @format: format of the image file +# +# @virtual-size: maximum capacity in bytes of the image +# +# @actual-size: #optional actual size on disk in bytes of the image +# +# @dirty-flag: #optional true if image is not cleanly closed +# +# @cluster-size: #optional size of a cluster in bytes +# +# @encrypted: #optional true if the image is encrypted +# +# @backing-filename: #optional name of the backing file +# +# @full-backing-filename: #optional full path of the backing file +# +# @backing-filename-format: #optional the format of the backing file +# +# @snapshots: #optional list of VM snapshots +# +# Since: 1.3 +# +## + +{ 'type': 'ImageInfo', + 'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool', + '*actual-size': 'int', 'virtual-size': 'int', + '*cluster-size': 'int', '*encrypted': 'bool', + '*backing-filename': 'str', '*full-backing-filename': 'str', + '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'] } } + +## # @StatusInfo: # # Information about VCPU run state @@ -785,7 +849,7 @@ ## # @SpiceQueryMouseMode # -# An enumation of Spice mouse states. +# An enumeration of Spice mouse states. # # @client: Mouse cursor position is determined by the client. # diff --git a/qemu-char.c b/qemu-char.c index 767da93862..7f0f895157 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -2141,18 +2141,13 @@ typedef struct { static void tcp_chr_accept(void *opaque); -static void tcp_chr_connect(void *opaque); - static int tcp_chr_write(CharDriverState *chr, const uint8_t *buf, int len) { TCPCharDriver *s = chr->opaque; if (s->connected) { return send_all(s->fd, buf, len); - } else if (s->listen_fd == -1) { - /* (Re-)connect for unconnected writing */ - tcp_chr_connect(chr); - return 0; } else { + /* XXX: indicate an error ? */ return len; } } @@ -2334,8 +2329,10 @@ static void tcp_chr_connect(void *opaque) TCPCharDriver *s = chr->opaque; s->connected = 1; - qemu_set_fd_handler2(s->fd, tcp_chr_read_poll, - tcp_chr_read, NULL, chr); + if (s->fd >= 0) { + qemu_set_fd_handler2(s->fd, tcp_chr_read_poll, + tcp_chr_read, NULL, chr); + } qemu_chr_generic_open(chr); } diff --git a/qemu-config.c b/qemu-config.c index eba977ebca..12eafbb4f5 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -615,6 +615,10 @@ static QemuOptsList qemu_machine_opts = { .name = "dump-guest-core", .type = QEMU_OPT_BOOL, .help = "Include guest memory in a core dump", + }, { + .name = "mem-merge", + .type = QEMU_OPT_BOOL, + .help = "enable/disable memory merge support", }, { /* End of list */ } }, diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index 39419a0314..0ef82e9ac7 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -34,9 +34,9 @@ STEXI ETEXI DEF("info", img_info, - "info [-f fmt] filename") + "info [-f fmt] [--output=ofmt] filename") STEXI -@item info [-f @var{fmt}] @var{filename} +@item info [-f @var{fmt}] [--output=@var{ofmt}] @var{filename} ETEXI DEF("snapshot", img_snapshot, diff --git a/qemu-img.c b/qemu-img.c index b41e670a61..f17f1872d6 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -21,12 +21,16 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include "qapi-visit.h" +#include "qapi/qmp-output-visitor.h" +#include "qjson.h" #include "qemu-common.h" #include "qemu-option.h" #include "qemu-error.h" #include "osdep.h" #include "sysemu.h" #include "block_int.h" +#include <getopt.h> #include <stdio.h> #ifdef _WIN32 @@ -84,12 +88,13 @@ static void help(void) " '-p' show progress of command (only certain commands)\n" " '-S' indicates the consecutive number of bytes that must contain only zeros\n" " for qemu-img to create a sparse image during conversion\n" + " '--output' takes the format in which the output must be done (human or json)\n" "\n" "Parameters to check subcommand:\n" " '-r' tries to repair any inconsistencies that are found during the check.\n" " '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n" " kinds of errors, with a higher risk of choosing the wrong fix or\n" - " hiding corruption that has already occured.\n" + " hiding corruption that has already occurred.\n" "\n" "Parameters to snapshot subcommand:\n" " 'snapshot' is the name of the snapshot to create, apply or delete\n" @@ -221,7 +226,8 @@ static int print_block_option_help(const char *filename, const char *fmt) static BlockDriverState *bdrv_new_open(const char *filename, const char *fmt, - int flags) + int flags, + bool require_io) { BlockDriverState *bs; BlockDriver *drv; @@ -246,7 +252,7 @@ static BlockDriverState *bdrv_new_open(const char *filename, goto fail; } - if (bdrv_is_encrypted(bs)) { + if (bdrv_is_encrypted(bs) && require_io) { printf("Disk image '%s' is encrypted.\n", filename); if (read_password(password, sizeof(password)) < 0) { error_report("No password given"); @@ -413,7 +419,7 @@ static int img_check(int argc, char **argv) } filename = argv[optind++]; - bs = bdrv_new_open(filename, fmt, flags); + bs = bdrv_new_open(filename, fmt, flags, true); if (!bs) { return 1; } @@ -520,7 +526,7 @@ static int img_commit(int argc, char **argv) return -1; } - bs = bdrv_new_open(filename, fmt, flags); + bs = bdrv_new_open(filename, fmt, flags, true); if (!bs) { return 1; } @@ -762,7 +768,7 @@ static int img_convert(int argc, char **argv) total_sectors = 0; for (bs_i = 0; bs_i < bs_n; bs_i++) { - bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt, BDRV_O_FLAGS); + bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt, BDRV_O_FLAGS, true); if (!bs[bs_i]) { error_report("Could not open '%s'", argv[optind + bs_i]); ret = -1; @@ -881,7 +887,7 @@ static int img_convert(int argc, char **argv) return -1; } - out_bs = bdrv_new_open(out_filename, out_fmt, flags); + out_bs = bdrv_new_open(out_filename, out_fmt, flags, true); if (!out_bs) { ret = -1; goto out; @@ -1102,21 +1108,174 @@ static void dump_snapshots(BlockDriverState *bs) g_free(sn_tab); } -static int img_info(int argc, char **argv) +static void collect_snapshots(BlockDriverState *bs , ImageInfo *info) +{ + int i, sn_count; + QEMUSnapshotInfo *sn_tab = NULL; + SnapshotInfoList *info_list, *cur_item = NULL; + sn_count = bdrv_snapshot_list(bs, &sn_tab); + + for (i = 0; i < sn_count; i++) { + info->has_snapshots = true; + info_list = g_new0(SnapshotInfoList, 1); + + info_list->value = g_new0(SnapshotInfo, 1); + info_list->value->id = g_strdup(sn_tab[i].id_str); + info_list->value->name = g_strdup(sn_tab[i].name); + info_list->value->vm_state_size = sn_tab[i].vm_state_size; + info_list->value->date_sec = sn_tab[i].date_sec; + info_list->value->date_nsec = sn_tab[i].date_nsec; + info_list->value->vm_clock_sec = sn_tab[i].vm_clock_nsec / 1000000000; + info_list->value->vm_clock_nsec = sn_tab[i].vm_clock_nsec % 1000000000; + + /* XXX: waiting for the qapi to support qemu-queue.h types */ + if (!cur_item) { + info->snapshots = cur_item = info_list; + } else { + cur_item->next = info_list; + cur_item = info_list; + } + + } + + g_free(sn_tab); +} + +static void dump_json_image_info(ImageInfo *info) +{ + Error *errp = NULL; + QString *str; + QmpOutputVisitor *ov = qmp_output_visitor_new(); + QObject *obj; + visit_type_ImageInfo(qmp_output_get_visitor(ov), + &info, NULL, &errp); + obj = qmp_output_get_qobject(ov); + str = qobject_to_json_pretty(obj); + assert(str != NULL); + printf("%s\n", qstring_get_str(str)); + qobject_decref(obj); + qmp_output_visitor_cleanup(ov); + QDECREF(str); +} + +static void collect_image_info(BlockDriverState *bs, + ImageInfo *info, + const char *filename, + const char *fmt) { - int c; - const char *filename, *fmt; - BlockDriverState *bs; - char size_buf[128], dsize_buf[128]; uint64_t total_sectors; - int64_t allocated_size; char backing_filename[1024]; char backing_filename2[1024]; BlockDriverInfo bdi; + bdrv_get_geometry(bs, &total_sectors); + + info->filename = g_strdup(filename); + info->format = g_strdup(bdrv_get_format_name(bs)); + info->virtual_size = total_sectors * 512; + info->actual_size = bdrv_get_allocated_file_size(bs); + info->has_actual_size = info->actual_size >= 0; + if (bdrv_is_encrypted(bs)) { + info->encrypted = true; + info->has_encrypted = true; + } + if (bdrv_get_info(bs, &bdi) >= 0) { + if (bdi.cluster_size != 0) { + info->cluster_size = bdi.cluster_size; + info->has_cluster_size = true; + } + info->dirty_flag = bdi.is_dirty; + info->has_dirty_flag = true; + } + bdrv_get_backing_filename(bs, backing_filename, sizeof(backing_filename)); + if (backing_filename[0] != '\0') { + info->backing_filename = g_strdup(backing_filename); + info->has_backing_filename = true; + bdrv_get_full_backing_filename(bs, backing_filename2, + sizeof(backing_filename2)); + + if (strcmp(backing_filename, backing_filename2) != 0) { + info->full_backing_filename = + g_strdup(backing_filename2); + info->has_full_backing_filename = true; + } + + if (bs->backing_format[0]) { + info->backing_filename_format = g_strdup(bs->backing_format); + info->has_backing_filename_format = true; + } + } +} + +static void dump_human_image_info(ImageInfo *info) +{ + char size_buf[128], dsize_buf[128]; + if (!info->has_actual_size) { + snprintf(dsize_buf, sizeof(dsize_buf), "unavailable"); + } else { + get_human_readable_size(dsize_buf, sizeof(dsize_buf), + info->actual_size); + } + get_human_readable_size(size_buf, sizeof(size_buf), info->virtual_size); + printf("image: %s\n" + "file format: %s\n" + "virtual size: %s (%" PRId64 " bytes)\n" + "disk size: %s\n", + info->filename, info->format, size_buf, + info->virtual_size, + dsize_buf); + + if (info->has_encrypted && info->encrypted) { + printf("encrypted: yes\n"); + } + + if (info->has_cluster_size) { + printf("cluster_size: %" PRId64 "\n", info->cluster_size); + } + + if (info->has_dirty_flag && info->dirty_flag) { + printf("cleanly shut down: no\n"); + } + + if (info->has_backing_filename) { + printf("backing file: %s", info->backing_filename); + if (info->has_full_backing_filename) { + printf(" (actual path: %s)", info->full_backing_filename); + } + putchar('\n'); + if (info->has_backing_filename_format) { + printf("backing file format: %s\n", info->backing_filename_format); + } + } +} + +enum {OPTION_OUTPUT = 256}; + +typedef enum OutputFormat { + OFORMAT_JSON, + OFORMAT_HUMAN, +} OutputFormat; + +static int img_info(int argc, char **argv) +{ + int c; + OutputFormat output_format = OFORMAT_HUMAN; + const char *filename, *fmt, *output; + BlockDriverState *bs; + ImageInfo *info; + fmt = NULL; + output = NULL; for(;;) { - c = getopt(argc, argv, "f:h"); + int option_index = 0; + static const struct option long_options[] = { + {"help", no_argument, 0, 'h'}, + {"format", required_argument, 0, 'f'}, + {"output", required_argument, 0, OPTION_OUTPUT}, + {0, 0, 0, 0} + }; + c = getopt_long(argc, argv, "f:h", + long_options, &option_index); if (c == -1) { break; } @@ -1128,6 +1287,9 @@ static int img_info(int argc, char **argv) case 'f': fmt = optarg; break; + case OPTION_OUTPUT: + output = optarg; + break; } } if (optind >= argc) { @@ -1135,48 +1297,35 @@ static int img_info(int argc, char **argv) } filename = argv[optind++]; - bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_NO_BACKING); - if (!bs) { + if (output && !strcmp(output, "json")) { + output_format = OFORMAT_JSON; + } else if (output && !strcmp(output, "human")) { + output_format = OFORMAT_HUMAN; + } else if (output) { + error_report("--output must be used with human or json as argument."); return 1; } - bdrv_get_geometry(bs, &total_sectors); - get_human_readable_size(size_buf, sizeof(size_buf), total_sectors * 512); - allocated_size = bdrv_get_allocated_file_size(bs); - if (allocated_size < 0) { - snprintf(dsize_buf, sizeof(dsize_buf), "unavailable"); - } else { - get_human_readable_size(dsize_buf, sizeof(dsize_buf), - allocated_size); - } - printf("image: %s\n" - "file format: %s\n" - "virtual size: %s (%" PRId64 " bytes)\n" - "disk size: %s\n", - filename, bdrv_get_format_name(bs), size_buf, - (total_sectors * 512), - dsize_buf); - if (bdrv_is_encrypted(bs)) { - printf("encrypted: yes\n"); - } - if (bdrv_get_info(bs, &bdi) >= 0) { - if (bdi.cluster_size != 0) { - printf("cluster_size: %d\n", bdi.cluster_size); - } - if (bdi.is_dirty) { - printf("cleanly shut down: no\n"); - } + + bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_NO_BACKING, false); + if (!bs) { + return 1; } - bdrv_get_backing_filename(bs, backing_filename, sizeof(backing_filename)); - if (backing_filename[0] != '\0') { - bdrv_get_full_backing_filename(bs, backing_filename2, - sizeof(backing_filename2)); - printf("backing file: %s", backing_filename); - if (strcmp(backing_filename, backing_filename2) != 0) { - printf(" (actual path: %s)", backing_filename2); - } - putchar('\n'); + + info = g_new0(ImageInfo, 1); + collect_image_info(bs, info, filename, fmt); + + switch (output_format) { + case OFORMAT_HUMAN: + dump_human_image_info(info); + dump_snapshots(bs); + break; + case OFORMAT_JSON: + collect_snapshots(bs, info); + dump_json_image_info(info); + break; } - dump_snapshots(bs); + + qapi_free_ImageInfo(info); bdrv_delete(bs); return 0; } @@ -1248,7 +1397,7 @@ static int img_snapshot(int argc, char **argv) filename = argv[optind++]; /* Open the image */ - bs = bdrv_new_open(filename, NULL, bdrv_oflags); + bs = bdrv_new_open(filename, NULL, bdrv_oflags, true); if (!bs) { return 1; } @@ -1366,7 +1515,7 @@ static int img_rebase(int argc, char **argv) * Ignore the old backing file for unsafe rebase in case we want to correct * the reference to a renamed or moved backing file. */ - bs = bdrv_new_open(filename, fmt, flags); + bs = bdrv_new_open(filename, fmt, flags, true); if (!bs) { return 1; } @@ -1639,7 +1788,7 @@ static int img_resize(int argc, char **argv) n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0); qemu_opts_del(param); - bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR); + bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR, true); if (!bs) { ret = -1; goto out; diff --git a/qemu-img.texi b/qemu-img.texi index 6b42e35fe7..8b05f2c428 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -87,7 +87,7 @@ Perform a consistency check on the disk image @var{filename}. If @code{-r} is specified, qemu-img tries to repair any inconsistencies found during the check. @code{-r leaks} repairs only cluster leaks, whereas @code{-r all} fixes all kinds of errors, with a higher risk of choosing the -wrong fix or hiding corruption that has already occured. +wrong fix or hiding corruption that has already occurred. Only the formats @code{qcow2}, @code{qed} and @code{vdi} support consistency checks. @@ -129,12 +129,13 @@ created as a copy on write image of the specified base image; the @var{backing_file} should have the same content as the input's base image, however the path, image format, etc may differ. -@item info [-f @var{fmt}] @var{filename} +@item info [-f @var{fmt}] [--output=@var{ofmt}] @var{filename} Give information about the disk image @var{filename}. Use it in particular to know the size reserved on disk which can be different from the displayed size. If VM snapshots are stored in the disk image, -they are displayed too. +they are displayed too. The command can output in the format @var{ofmt} +which is either @code{human} or @code{json}. @item snapshot [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot} ] @var{filename} diff --git a/qemu-options.hx b/qemu-options.hx index 804a2d1739..09c86c4cb0 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -38,7 +38,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \ " supported accelerators are kvm, xen, tcg (default: tcg)\n" " kernel_irqchip=on|off controls accelerated irqchip support\n" " kvm_shadow_mem=size of KVM shadow MMU\n" - " dump-guest-core=on|off include guest memory in a core dump (default=on)\n", + " dump-guest-core=on|off include guest memory in a core dump (default=on)\n" + " mem-merge=on|off controls memory merge support (default: on)\n", QEMU_ARCH_ALL) STEXI @item -machine [type=]@var{name}[,prop=@var{value}[,...]] @@ -57,6 +58,10 @@ Enables in-kernel irqchip support for the chosen accelerator when available. Defines the size of the KVM shadow MMU. @item dump-guest-core=on|off Include guest memory in a core dump. The default is on. +@item mem-merge=on|off +Enables or disables memory merge support. This feature, when supported by +the host, de-duplicates identical memory pages among VMs instances +(enabled by default). @end table ETEXI @@ -1357,6 +1362,7 @@ Valid values for @var{type} are Not all devices are supported on all targets. Use -net nic,model=? for a list of available devices for your target. +@item -netdev user,id=@var{id}[,@var{option}][,@var{option}][,...] @item -net user[,@var{option}][,@var{option}][,...] Use the user mode network stack which requires no administrator privilege to run. Valid options are: @@ -1365,6 +1371,7 @@ privilege to run. Valid options are: @item vlan=@var{n} Connect user mode stack to VLAN @var{n} (@var{n} = 0 is the default). +@item id=@var{id} @item name=@var{name} Assign symbolic name for use in monitor commands. @@ -1490,6 +1497,7 @@ processed and applied to -net user. Mixing them with the new configuration syntax gives undefined results. Their use for new applications is discouraged as they will be removed from future versions. +@item -netdev tap,id=@var{id}[,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}] @item -net tap[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}] Connect the host TAP network interface @var{name} to VLAN @var{n}. @@ -1529,6 +1537,7 @@ qemu-system-i386 linux.img \ -net nic -net tap,"helper=/usr/local/libexec/qemu-bridge-helper" @end example +@item -netdev bridge,id=@var{id}[,br=@var{bridge}][,helper=@var{helper}] @item -net bridge[,vlan=@var{n}][,name=@var{name}][,br=@var{bridge}][,helper=@var{helper}] Connect a host TAP network interface to a host bridge device. @@ -1551,6 +1560,7 @@ qemu-system-i386 linux.img -net bridge -net nic,model=virtio qemu-system-i386 linux.img -net bridge,br=qemubr0 -net nic,model=virtio @end example +@item -netdev socket,id=@var{id}[,fd=@var{h}][,listen=[@var{host}]:@var{port}][,connect=@var{host}:@var{port}] @item -net socket[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}] [,listen=[@var{host}]:@var{port}][,connect=@var{host}:@var{port}] Connect the VLAN @var{n} to a remote VLAN in another QEMU virtual @@ -1573,6 +1583,7 @@ qemu-system-i386 linux.img \ -net socket,connect=127.0.0.1:1234 @end example +@item -netdev socket,id=@var{id}[,fd=@var{h}][,mcast=@var{maddr}:@var{port}[,localaddr=@var{addr}]] @item -net socket[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,mcast=@var{maddr}:@var{port}[,localaddr=@var{addr}]] Create a VLAN @var{n} shared with another QEMU virtual @@ -1624,6 +1635,7 @@ qemu-system-i386 linux.img \ -net socket,mcast=239.192.168.1:1102,localaddr=1.2.3.4 @end example +@item -netdev vde,id=@var{id}[,sock=@var{socketpath}][,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}] @item -net vde[,vlan=@var{n}][,name=@var{name}][,sock=@var{socketpath}] [,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}] Connect VLAN @var{n} to PORT @var{n} of a vde switch running on host and listening for incoming connections on @var{socketpath}. Use GROUP @var{groupname} diff --git a/qemu-os-win32.h b/qemu-os-win32.h index 753679b194..3b5a35b6c2 100644 --- a/qemu-os-win32.h +++ b/qemu-os-win32.h @@ -68,6 +68,12 @@ /* Declaration of ffs() is missing in MinGW's strings.h. */ int ffs(int i); +/* Missing POSIX functions. Don't use MinGW-w64 macros. */ +#undef gmtime_r +struct tm *gmtime_r(const time_t *timep, struct tm *result); +#undef localtime_r +struct tm *localtime_r(const time_t *timep, struct tm *result); + static inline void os_setup_signal_handling(void) {} static inline void os_daemonize(void) {} static inline void os_setup_post(void) {} diff --git a/qemu-timer.h b/qemu-timer.h index f8af595f13..da7e97cd5a 100644 --- a/qemu-timer.h +++ b/qemu-timer.h @@ -218,7 +218,7 @@ static inline int64_t cpu_get_real_ticks(void) return val; } -#elif defined(__sparc_v8plus__) || defined(__sparc_v8plusa__) || defined(__sparc_v9__) +#elif defined(__sparc__) static inline int64_t cpu_get_real_ticks (void) { @@ -227,6 +227,8 @@ static inline int64_t cpu_get_real_ticks (void) asm volatile("rd %%tick,%0" : "=r"(rval)); return rval; #else + /* We need an %o or %g register for this. For recent enough gcc + there is an "h" constraint for that. Don't bother with that. */ union { uint64_t i64; struct { @@ -234,8 +236,8 @@ static inline int64_t cpu_get_real_ticks (void) uint32_t low; } i32; } rval; - asm volatile("rd %%tick,%1; srlx %1,32,%0" - : "=r"(rval.i32.high), "=r"(rval.i32.low)); + asm volatile("rd %%tick,%%g1; srlx %%g1,32,%0; mov %%g1,%1" + : "=r"(rval.i32.high), "=r"(rval.i32.low) : : "g1"); return rval.i64; #endif } diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index a639c5bff0..53a6f87650 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -28,7 +28,21 @@ if [ -z "$output" ]; then output="$PWD" fi -for arch in x86 powerpc s390; do +# This will pick up non-directories too (eg "Kconfig") but we will +# ignore them in the next loop. +ARCHLIST=$(cd "$linux/arch" && echo *) + +for arch in $ARCHLIST; do + # Discard anything which isn't a KVM-supporting architecture + if ! [ -e "$linux/arch/$arch/include/asm/kvm.h" ]; then + continue + fi + + # Blacklist architectures which have KVM headers but are actually dead + if [ "$arch" = "ia64" ]; then + continue + fi + make -C "$linux" INSTALL_HDR_PATH="$tmpdir" SRCARCH=$arch headers_install rm -rf "$output/linux-headers/asm-$arch" diff --git a/slirp/tcp_subr.c b/slirp/tcp_subr.c index 025b374367..1542e43619 100644 --- a/slirp/tcp_subr.c +++ b/slirp/tcp_subr.c @@ -114,9 +114,9 @@ tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, int win = 0; DEBUG_CALL("tcp_respond"); - DEBUG_ARG("tp = %lx", (long)tp); - DEBUG_ARG("ti = %lx", (long)ti); - DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("tp = %p", tp); + DEBUG_ARG("ti = %p", ti); + DEBUG_ARG("m = %p", m); DEBUG_ARG("ack = %u", ack); DEBUG_ARG("seq = %u", seq); DEBUG_ARG("flags = %x", flags); @@ -124,7 +124,7 @@ tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, if (tp) win = sbspace(&tp->t_socket->so_rcv); if (m == NULL) { - if ((m = m_get(tp->t_socket->slirp)) == NULL) + if (!tp || (m = m_get(tp->t_socket->slirp)) == NULL) return; tlen = 0; m->m_data += IF_MAXLINKHDR; diff --git a/slirp/tftp.c b/slirp/tftp.c index b78765f3af..1a79c45cfb 100644 --- a/slirp/tftp.c +++ b/slirp/tftp.c @@ -37,6 +37,10 @@ static inline void tftp_session_update(struct tftp_session *spt) static void tftp_session_terminate(struct tftp_session *spt) { + if (spt->fd >= 0) { + close(spt->fd); + spt->fd = -1; + } g_free(spt->filename); spt->slirp = NULL; } @@ -54,7 +58,7 @@ static int tftp_session_allocate(Slirp *slirp, struct tftp_t *tp) /* sessions time out after 5 inactive seconds */ if ((int)(curtime - spt->timestamp) > 5000) { - g_free(spt->filename); + tftp_session_terminate(spt); goto found; } } @@ -64,6 +68,7 @@ static int tftp_session_allocate(Slirp *slirp, struct tftp_t *tp) found: memset(spt, 0, sizeof(*spt)); memcpy(&spt->client_ip, &tp->ip.ip_src, sizeof(spt->client_ip)); + spt->fd = -1; spt->client_port = tp->udp.uh_sport; spt->slirp = slirp; @@ -92,37 +97,36 @@ static int tftp_session_find(Slirp *slirp, struct tftp_t *tp) return -1; } -static int tftp_read_data(struct tftp_session *spt, uint16_t block_nr, +static int tftp_read_data(struct tftp_session *spt, uint32_t block_nr, uint8_t *buf, int len) { - int fd; - int bytes_read = 0; + int bytes_read = 0; - fd = open(spt->filename, O_RDONLY | O_BINARY); + if (spt->fd < 0) { + spt->fd = open(spt->filename, O_RDONLY | O_BINARY); + } - if (fd < 0) { - return -1; - } + if (spt->fd < 0) { + return -1; + } - if (len) { - lseek(fd, block_nr * 512, SEEK_SET); + if (len) { + lseek(spt->fd, block_nr * 512, SEEK_SET); - bytes_read = read(fd, buf, len); - } - - close(fd); + bytes_read = read(spt->fd, buf, len); + } - return bytes_read; + return bytes_read; } static int tftp_send_oack(struct tftp_session *spt, - const char *key, uint32_t value, + const char *keys[], uint32_t values[], int nb, struct tftp_t *recv_tp) { struct sockaddr_in saddr, daddr; struct mbuf *m; struct tftp_t *tp; - int n = 0; + int i, n = 0; m = m_get(spt->slirp); @@ -136,10 +140,12 @@ static int tftp_send_oack(struct tftp_session *spt, m->m_data += sizeof(struct udpiphdr); tp->tp_op = htons(TFTP_OACK); - n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", - key) + 1; - n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u", - value) + 1; + for (i = 0; i < nb; i++) { + n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", + keys[i]) + 1; + n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u", + values[i]) + 1; + } saddr.sin_addr = recv_tp->ip.ip_dst; saddr.sin_port = recv_tp->udp.uh_dport; @@ -193,23 +199,18 @@ out: tftp_session_terminate(spt); } -static int tftp_send_data(struct tftp_session *spt, - uint16_t block_nr, - struct tftp_t *recv_tp) +static void tftp_send_next_block(struct tftp_session *spt, + struct tftp_t *recv_tp) { struct sockaddr_in saddr, daddr; struct mbuf *m; struct tftp_t *tp; int nobytes; - if (block_nr < 1) { - return -1; - } - m = m_get(spt->slirp); if (!m) { - return -1; + return; } memset(m->m_data, 0, m->m_size); @@ -219,7 +220,7 @@ static int tftp_send_data(struct tftp_session *spt, m->m_data += sizeof(struct udpiphdr); tp->tp_op = htons(TFTP_DATA); - tp->x.tp_data.tp_block_nr = htons(block_nr); + tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff); saddr.sin_addr = recv_tp->ip.ip_dst; saddr.sin_port = recv_tp->udp.uh_dport; @@ -227,7 +228,7 @@ static int tftp_send_data(struct tftp_session *spt, daddr.sin_addr = spt->client_ip; daddr.sin_port = spt->client_port; - nobytes = tftp_read_data(spt, block_nr - 1, tp->x.tp_data.tp_buf, 512); + nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, 512); if (nobytes < 0) { m_free(m); @@ -236,7 +237,7 @@ static int tftp_send_data(struct tftp_session *spt, tftp_send_error(spt, 1, "File not found", tp); - return -1; + return; } m->m_len = sizeof(struct tftp_t) - (512 - nobytes) - @@ -251,7 +252,7 @@ static int tftp_send_data(struct tftp_session *spt, tftp_session_terminate(spt); } - return 0; + spt->block_nr++; } static void tftp_handle_rrq(Slirp *slirp, struct tftp_t *tp, int pktlen) @@ -260,6 +261,9 @@ static void tftp_handle_rrq(Slirp *slirp, struct tftp_t *tp, int pktlen) int s, k; size_t prefix_len; char *req_fname; + const char *option_name[2]; + uint32_t option_value[2]; + int nb_options = 0; /* check if a session already exists and if so terminate it */ s = tftp_session_find(slirp, tp); @@ -337,7 +341,7 @@ static void tftp_handle_rrq(Slirp *slirp, struct tftp_t *tp, int pktlen) return; } - while (k < pktlen) { + while (k < pktlen && nb_options < ARRAY_SIZE(option_name)) { const char *key, *value; key = &tp->x.tp_buf[k]; @@ -364,12 +368,32 @@ static void tftp_handle_rrq(Slirp *slirp, struct tftp_t *tp, int pktlen) } } - tftp_send_oack(spt, "tsize", tsize, tp); - return; + option_name[nb_options] = "tsize"; + option_value[nb_options] = tsize; + nb_options++; + } else if (strcasecmp(key, "blksize") == 0) { + int blksize = atoi(value); + + /* If blksize option is bigger than what we will + * emit, accept the option with our packet size. + * Otherwise, simply do as we didn't see the option. + */ + if (blksize >= 512) { + option_name[nb_options] = "blksize"; + option_value[nb_options] = 512; + nb_options++; + } } } - tftp_send_data(spt, 1, tp); + if (nb_options > 0) { + assert(nb_options <= ARRAY_SIZE(option_name)); + tftp_send_oack(spt, option_name, option_value, nb_options, tp); + return; + } + + spt->block_nr = 0; + tftp_send_next_block(spt, tp); } static void tftp_handle_ack(Slirp *slirp, struct tftp_t *tp, int pktlen) @@ -382,11 +406,7 @@ static void tftp_handle_ack(Slirp *slirp, struct tftp_t *tp, int pktlen) return; } - if (tftp_send_data(&slirp->tftp_sessions[s], - ntohs(tp->x.tp_data.tp_block_nr) + 1, - tp) < 0) { - return; - } + tftp_send_next_block(&slirp->tftp_sessions[s], tp); } static void tftp_handle_error(Slirp *slirp, struct tftp_t *tp, int pktlen) diff --git a/slirp/tftp.h b/slirp/tftp.h index 72e5e91bef..51704e4874 100644 --- a/slirp/tftp.h +++ b/slirp/tftp.h @@ -33,9 +33,11 @@ struct tftp_t { struct tftp_session { Slirp *slirp; char *filename; + int fd; struct in_addr client_ip; uint16_t client_port; + uint32_t block_nr; int timestamp; }; diff --git a/target-alpha/translate.c b/target-alpha/translate.c index 12de6a3fb6..4a9011a2b6 100644 --- a/target-alpha/translate.c +++ b/target-alpha/translate.c @@ -426,27 +426,15 @@ static ExitStatus gen_bcond_internal(DisasContext *ctx, TCGCond cond, return EXIT_GOTO_TB; } else { - int lab_over = gen_new_label(); - - /* ??? Consider using either - movi pc, next - addi tmp, pc, disp - movcond pc, cond, 0, tmp, pc - or - setcond tmp, cond, 0 - movi pc, next - neg tmp, tmp - andi tmp, tmp, disp - add pc, pc, tmp - The current diamond subgraph surely isn't efficient. */ + TCGv_i64 z = tcg_const_i64(0); + TCGv_i64 d = tcg_const_i64(dest); + TCGv_i64 p = tcg_const_i64(ctx->pc); - tcg_gen_brcondi_i64(cond, cmp, 0, lab_true); - tcg_gen_movi_i64(cpu_pc, ctx->pc); - tcg_gen_br(lab_over); - gen_set_label(lab_true); - tcg_gen_movi_i64(cpu_pc, dest); - gen_set_label(lab_over); + tcg_gen_movcond_i64(cond, cpu_pc, cmp, z, d, p); + tcg_temp_free_i64(z); + tcg_temp_free_i64(d); + tcg_temp_free_i64(p); return EXIT_PC_UPDATED; } } @@ -521,61 +509,67 @@ static ExitStatus gen_fbcond(DisasContext *ctx, TCGCond cond, int ra, static void gen_cmov(TCGCond cond, int ra, int rb, int rc, int islit, uint8_t lit, int mask) { - TCGCond inv_cond = tcg_invert_cond(cond); - int l1; + TCGv_i64 c1, z, v1; - if (unlikely(rc == 31)) + if (unlikely(rc == 31)) { return; + } - l1 = gen_new_label(); - - if (ra != 31) { - if (mask) { - TCGv tmp = tcg_temp_new(); - tcg_gen_andi_i64(tmp, cpu_ir[ra], 1); - tcg_gen_brcondi_i64(inv_cond, tmp, 0, l1); - tcg_temp_free(tmp); - } else - tcg_gen_brcondi_i64(inv_cond, cpu_ir[ra], 0, l1); - } else { + if (ra == 31) { /* Very uncommon case - Do not bother to optimize. */ - TCGv tmp = tcg_const_i64(0); - tcg_gen_brcondi_i64(inv_cond, tmp, 0, l1); - tcg_temp_free(tmp); + c1 = tcg_const_i64(0); + } else if (mask) { + c1 = tcg_const_i64(1); + tcg_gen_and_i64(c1, c1, cpu_ir[ra]); + } else { + c1 = cpu_ir[ra]; } + if (islit) { + v1 = tcg_const_i64(lit); + } else { + v1 = cpu_ir[rb]; + } + z = tcg_const_i64(0); - if (islit) - tcg_gen_movi_i64(cpu_ir[rc], lit); - else - tcg_gen_mov_i64(cpu_ir[rc], cpu_ir[rb]); - gen_set_label(l1); + tcg_gen_movcond_i64(cond, cpu_ir[rc], c1, z, v1, cpu_ir[rc]); + + tcg_temp_free_i64(z); + if (ra == 31 || mask) { + tcg_temp_free_i64(c1); + } + if (islit) { + tcg_temp_free_i64(v1); + } } static void gen_fcmov(TCGCond cond, int ra, int rb, int rc) { - TCGv cmp_tmp; - int l1; + TCGv_i64 c1, z, v1; if (unlikely(rc == 31)) { return; } - cmp_tmp = tcg_temp_new(); + c1 = tcg_temp_new_i64(); if (unlikely(ra == 31)) { - tcg_gen_movi_i64(cmp_tmp, 0); + tcg_gen_movi_i64(c1, 0); + } else { + gen_fold_mzero(cond, c1, cpu_fir[ra]); + } + if (rb == 31) { + v1 = tcg_const_i64(0); } else { - gen_fold_mzero(cond, cmp_tmp, cpu_fir[ra]); + v1 = cpu_fir[rb]; } + z = tcg_const_i64(0); - l1 = gen_new_label(); - tcg_gen_brcondi_i64(tcg_invert_cond(cond), cmp_tmp, 0, l1); - tcg_temp_free(cmp_tmp); + tcg_gen_movcond_i64(cond, cpu_fir[rc], c1, z, v1, cpu_fir[rc]); - if (rb != 31) - tcg_gen_mov_i64(cpu_fir[rc], cpu_fir[rb]); - else - tcg_gen_movi_i64(cpu_fir[rc], 0); - gen_set_label(l1); + tcg_temp_free_i64(z); + tcg_temp_free_i64(c1); + if (rb == 31) { + tcg_temp_free_i64(v1); + } } #define QUAL_RM_N 0x080 /* Round mode nearest even */ diff --git a/target-mips/Makefile.objs b/target-mips/Makefile.objs index ca20f21443..3eeeeac8b8 100644 --- a/target-mips/Makefile.objs +++ b/target-mips/Makefile.objs @@ -1,2 +1,2 @@ -obj-y += translate.o op_helper.o helper.o cpu.o +obj-y += translate.o op_helper.o lmi_helper.o helper.o cpu.o obj-$(CONFIG_SOFTMMU) += machine.o diff --git a/target-mips/helper.h b/target-mips/helper.h index 109ac37fd6..f35ed78c16 100644 --- a/target-mips/helper.h +++ b/target-mips/helper.h @@ -303,4 +303,63 @@ DEF_HELPER_1(rdhwr_ccres, tl, env) DEF_HELPER_2(pmon, void, env, int) DEF_HELPER_1(wait, void, env) +/* Loongson multimedia functions. */ +DEF_HELPER_FLAGS_2(paddsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(paddush, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(paddh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(paddw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(paddsb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(paddusb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(paddb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) + +DEF_HELPER_FLAGS_2(psubsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(psubush, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(psubh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(psubw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(psubsb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(psubusb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(psubb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) + +DEF_HELPER_FLAGS_2(pshufh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(packsswh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(packsshb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(packushb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) + +DEF_HELPER_FLAGS_2(punpcklhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(punpckhhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(punpcklbh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(punpckhbh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(punpcklwd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(punpckhwd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) + +DEF_HELPER_FLAGS_2(pavgh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(pavgb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(pmaxsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(pminsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(pmaxub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(pminub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) + +DEF_HELPER_FLAGS_2(pcmpeqw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(pcmpgtw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(pcmpeqh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(pcmpgth, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(pcmpeqb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(pcmpgtb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) + +DEF_HELPER_FLAGS_2(psllw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(psllh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(psrlw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(psrlh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(psraw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(psrah, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) + +DEF_HELPER_FLAGS_2(pmullh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(pmulhh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(pmulhuh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(pmaddhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) + +DEF_HELPER_FLAGS_2(pasubub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_1(biadd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64) +DEF_HELPER_FLAGS_1(pmovmskb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64) + #include "def-helper.h" diff --git a/target-mips/lmi_helper.c b/target-mips/lmi_helper.c new file mode 100644 index 0000000000..1b24353519 --- /dev/null +++ b/target-mips/lmi_helper.c @@ -0,0 +1,744 @@ +/* + * Loongson Multimedia Instruction emulation helpers for QEMU. + * + * Copyright (c) 2011 Richard Henderson <rth@twiddle.net> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "cpu.h" +#include "helper.h" + +/* If the byte ordering doesn't matter, i.e. all columns are treated + identically, then this union can be used directly. If byte ordering + does matter, we generally ignore dumping to memory. */ +typedef union { + uint8_t ub[8]; + int8_t sb[8]; + uint16_t uh[4]; + int16_t sh[4]; + uint32_t uw[2]; + int32_t sw[2]; + uint64_t d; +} LMIValue; + +/* Some byte ordering issues can be mitigated by XORing in the following. */ +#ifdef HOST_WORDS_BIGENDIAN +# define BYTE_ORDER_XOR(N) N +#else +# define BYTE_ORDER_XOR(N) 0 +#endif + +#define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x) +#define SATUB(x) (x > 0xff ? 0xff : x) + +#define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x) +#define SATUH(x) (x > 0xffff ? 0xffff : x) + +#define SATSW(x) \ + (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x) +#define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x) + +uint64_t helper_paddsb(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned int i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 8; ++i) { + int r = vs.sb[i] + vt.sb[i]; + vs.sb[i] = SATSB(r); + } + return vs.d; +} + +uint64_t helper_paddusb(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned int i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 8; ++i) { + int r = vs.ub[i] + vt.ub[i]; + vs.ub[i] = SATUB(r); + } + return vs.d; +} + +uint64_t helper_paddsh(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned int i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; ++i) { + int r = vs.sh[i] + vt.sh[i]; + vs.sh[i] = SATSH(r); + } + return vs.d; +} + +uint64_t helper_paddush(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned int i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; ++i) { + int r = vs.uh[i] + vt.uh[i]; + vs.uh[i] = SATUH(r); + } + return vs.d; +} + +uint64_t helper_paddb(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned int i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 8; ++i) { + vs.ub[i] += vt.ub[i]; + } + return vs.d; +} + +uint64_t helper_paddh(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned int i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; ++i) { + vs.uh[i] += vt.uh[i]; + } + return vs.d; +} + +uint64_t helper_paddw(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned int i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 2; ++i) { + vs.uw[i] += vt.uw[i]; + } + return vs.d; +} + +uint64_t helper_psubsb(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned int i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 8; ++i) { + int r = vs.sb[i] - vt.sb[i]; + vs.sb[i] = SATSB(r); + } + return vs.d; +} + +uint64_t helper_psubusb(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned int i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 8; ++i) { + int r = vs.ub[i] - vt.ub[i]; + vs.ub[i] = SATUB(r); + } + return vs.d; +} + +uint64_t helper_psubsh(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned int i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; ++i) { + int r = vs.sh[i] - vt.sh[i]; + vs.sh[i] = SATSH(r); + } + return vs.d; +} + +uint64_t helper_psubush(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned int i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; ++i) { + int r = vs.uh[i] - vt.uh[i]; + vs.uh[i] = SATUH(r); + } + return vs.d; +} + +uint64_t helper_psubb(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned int i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 8; ++i) { + vs.ub[i] -= vt.ub[i]; + } + return vs.d; +} + +uint64_t helper_psubh(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned int i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; ++i) { + vs.uh[i] -= vt.uh[i]; + } + return vs.d; +} + +uint64_t helper_psubw(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned int i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 2; ++i) { + vs.uw[i] -= vt.uw[i]; + } + return vs.d; +} + +uint64_t helper_pshufh(uint64_t fs, uint64_t ft) +{ + unsigned host = BYTE_ORDER_XOR(3); + LMIValue vd, vs; + unsigned i; + + vs.d = fs; + vd.d = 0; + for (i = 0; i < 4; i++, ft >>= 2) { + vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host]; + } + return vd.d; +} + +uint64_t helper_packsswh(uint64_t fs, uint64_t ft) +{ + uint64_t fd = 0; + int64_t tmp; + + tmp = (int32_t)(fs >> 0); + tmp = SATSH(tmp); + fd |= (tmp & 0xffff) << 0; + + tmp = (int32_t)(fs >> 32); + tmp = SATSH(tmp); + fd |= (tmp & 0xffff) << 16; + + tmp = (int32_t)(ft >> 0); + tmp = SATSH(tmp); + fd |= (tmp & 0xffff) << 32; + + tmp = (int32_t)(ft >> 32); + tmp = SATSH(tmp); + fd |= (tmp & 0xffff) << 48; + + return fd; +} + +uint64_t helper_packsshb(uint64_t fs, uint64_t ft) +{ + uint64_t fd = 0; + unsigned int i; + + for (i = 0; i < 4; ++i) { + int16_t tmp = fs >> (i * 16); + tmp = SATSB(tmp); + fd |= (uint64_t)(tmp & 0xff) << (i * 8); + } + for (i = 0; i < 4; ++i) { + int16_t tmp = ft >> (i * 16); + tmp = SATSB(tmp); + fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32); + } + + return fd; +} + +uint64_t helper_packushb(uint64_t fs, uint64_t ft) +{ + uint64_t fd = 0; + unsigned int i; + + for (i = 0; i < 4; ++i) { + int16_t tmp = fs >> (i * 16); + tmp = SATUB(tmp); + fd |= (uint64_t)(tmp & 0xff) << (i * 8); + } + for (i = 0; i < 4; ++i) { + int16_t tmp = ft >> (i * 16); + tmp = SATUB(tmp); + fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32); + } + + return fd; +} + +uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft) +{ + return (fs & 0xffffffff) | (ft << 32); +} + +uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft) +{ + return (fs >> 32) | (ft & ~0xffffffffull); +} + +uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft) +{ + unsigned host = BYTE_ORDER_XOR(3); + LMIValue vd, vs, vt; + + vs.d = fs; + vt.d = ft; + vd.uh[0 ^ host] = vs.uh[0 ^ host]; + vd.uh[1 ^ host] = vt.uh[0 ^ host]; + vd.uh[2 ^ host] = vs.uh[1 ^ host]; + vd.uh[3 ^ host] = vt.uh[1 ^ host]; + + return vd.d; +} + +uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft) +{ + unsigned host = BYTE_ORDER_XOR(3); + LMIValue vd, vs, vt; + + vs.d = fs; + vt.d = ft; + vd.uh[0 ^ host] = vs.uh[2 ^ host]; + vd.uh[1 ^ host] = vt.uh[2 ^ host]; + vd.uh[2 ^ host] = vs.uh[3 ^ host]; + vd.uh[3 ^ host] = vt.uh[3 ^ host]; + + return vd.d; +} + +uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft) +{ + unsigned host = BYTE_ORDER_XOR(7); + LMIValue vd, vs, vt; + + vs.d = fs; + vt.d = ft; + vd.ub[0 ^ host] = vs.ub[0 ^ host]; + vd.ub[1 ^ host] = vt.ub[0 ^ host]; + vd.ub[2 ^ host] = vs.ub[1 ^ host]; + vd.ub[3 ^ host] = vt.ub[1 ^ host]; + vd.ub[4 ^ host] = vs.ub[2 ^ host]; + vd.ub[5 ^ host] = vt.ub[2 ^ host]; + vd.ub[6 ^ host] = vs.ub[3 ^ host]; + vd.ub[7 ^ host] = vt.ub[3 ^ host]; + + return vd.d; +} + +uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft) +{ + unsigned host = BYTE_ORDER_XOR(7); + LMIValue vd, vs, vt; + + vs.d = fs; + vt.d = ft; + vd.ub[0 ^ host] = vs.ub[4 ^ host]; + vd.ub[1 ^ host] = vt.ub[4 ^ host]; + vd.ub[2 ^ host] = vs.ub[5 ^ host]; + vd.ub[3 ^ host] = vt.ub[5 ^ host]; + vd.ub[4 ^ host] = vs.ub[6 ^ host]; + vd.ub[5 ^ host] = vt.ub[6 ^ host]; + vd.ub[6 ^ host] = vs.ub[7 ^ host]; + vd.ub[7 ^ host] = vt.ub[7 ^ host]; + + return vd.d; +} + +uint64_t helper_pavgh(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; i++) { + vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1; + } + return vs.d; +} + +uint64_t helper_pavgb(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 8; i++) { + vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1; + } + return vs.d; +} + +uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; i++) { + vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]); + } + return vs.d; +} + +uint64_t helper_pminsh(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; i++) { + vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]); + } + return vs.d; +} + +uint64_t helper_pmaxub(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; i++) { + vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]); + } + return vs.d; +} + +uint64_t helper_pminub(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; i++) { + vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]); + } + return vs.d; +} + +uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 2; i++) { + vs.uw[i] = -(vs.uw[i] == vt.uw[i]); + } + return vs.d; +} + +uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 2; i++) { + vs.uw[i] = -(vs.uw[i] > vt.uw[i]); + } + return vs.d; +} + +uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; i++) { + vs.uh[i] = -(vs.uh[i] == vt.uh[i]); + } + return vs.d; +} + +uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; i++) { + vs.uh[i] = -(vs.uh[i] > vt.uh[i]); + } + return vs.d; +} + +uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 8; i++) { + vs.ub[i] = -(vs.ub[i] == vt.ub[i]); + } + return vs.d; +} + +uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 8; i++) { + vs.ub[i] = -(vs.ub[i] > vt.ub[i]); + } + return vs.d; +} + +uint64_t helper_psllw(uint64_t fs, uint64_t ft) +{ + LMIValue vs; + unsigned i; + + ft &= 0x7f; + if (ft > 31) { + return 0; + } + vs.d = fs; + for (i = 0; i < 2; ++i) { + vs.uw[i] <<= ft; + } + return vs.d; +} + +uint64_t helper_psrlw(uint64_t fs, uint64_t ft) +{ + LMIValue vs; + unsigned i; + + ft &= 0x7f; + if (ft > 31) { + return 0; + } + vs.d = fs; + for (i = 0; i < 2; ++i) { + vs.uw[i] >>= ft; + } + return vs.d; +} + +uint64_t helper_psraw(uint64_t fs, uint64_t ft) +{ + LMIValue vs; + unsigned i; + + ft &= 0x7f; + if (ft > 31) { + ft = 31; + } + vs.d = fs; + for (i = 0; i < 2; ++i) { + vs.sw[i] >>= ft; + } + return vs.d; +} + +uint64_t helper_psllh(uint64_t fs, uint64_t ft) +{ + LMIValue vs; + unsigned i; + + ft &= 0x7f; + if (ft > 15) { + return 0; + } + vs.d = fs; + for (i = 0; i < 4; ++i) { + vs.uh[i] <<= ft; + } + return vs.d; +} + +uint64_t helper_psrlh(uint64_t fs, uint64_t ft) +{ + LMIValue vs; + unsigned i; + + ft &= 0x7f; + if (ft > 15) { + return 0; + } + vs.d = fs; + for (i = 0; i < 4; ++i) { + vs.uh[i] >>= ft; + } + return vs.d; +} + +uint64_t helper_psrah(uint64_t fs, uint64_t ft) +{ + LMIValue vs; + unsigned i; + + ft &= 0x7f; + if (ft > 15) { + ft = 15; + } + vs.d = fs; + for (i = 0; i < 4; ++i) { + vs.sh[i] >>= ft; + } + return vs.d; +} + +uint64_t helper_pmullh(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; ++i) { + vs.sh[i] *= vt.sh[i]; + } + return vs.d; +} + +uint64_t helper_pmulhh(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; ++i) { + int32_t r = vs.sh[i] * vt.sh[i]; + vs.sh[i] = r >> 16; + } + return vs.d; +} + +uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 4; ++i) { + uint32_t r = vs.uh[i] * vt.uh[i]; + vs.uh[i] = r >> 16; + } + return vs.d; +} + +uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft) +{ + unsigned host = BYTE_ORDER_XOR(3); + LMIValue vs, vt; + uint32_t p0, p1; + + vs.d = fs; + vt.d = ft; + p0 = vs.sh[0 ^ host] * vt.sh[0 ^ host]; + p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host]; + p1 = vs.sh[2 ^ host] * vt.sh[2 ^ host]; + p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host]; + + return ((uint64_t)p1 << 32) | p0; +} + +uint64_t helper_pasubub(uint64_t fs, uint64_t ft) +{ + LMIValue vs, vt; + unsigned i; + + vs.d = fs; + vt.d = ft; + for (i = 0; i < 8; ++i) { + int r = vs.ub[i] - vt.ub[i]; + vs.ub[i] = (r < 0 ? -r : r); + } + return vs.d; +} + +uint64_t helper_biadd(uint64_t fs) +{ + unsigned i, fd; + + for (i = fd = 0; i < 8; ++i) { + fd += (fs >> (i * 8)) & 0xff; + } + return fd & 0xffff; +} + +uint64_t helper_pmovmskb(uint64_t fs) +{ + unsigned fd = 0; + + fd |= ((fs >> 7) & 1) << 0; + fd |= ((fs >> 15) & 1) << 1; + fd |= ((fs >> 23) & 1) << 2; + fd |= ((fs >> 31) & 1) << 3; + fd |= ((fs >> 39) & 1) << 4; + fd |= ((fs >> 47) & 1) << 5; + fd |= ((fs >> 55) & 1) << 6; + fd |= ((fs >> 63) & 1) << 7; + + return fd & 0xff; +} diff --git a/target-mips/translate.c b/target-mips/translate.c index 52eeb2bf79..fa79d4945b 100644 --- a/target-mips/translate.c +++ b/target-mips/translate.c @@ -28,7 +28,7 @@ #define GEN_HELPER 1 #include "helper.h" -//#define MIPS_DEBUG_DISAS +#define MIPS_DEBUG_DISAS 0 //#define MIPS_DEBUG_SIGN_EXTENSIONS /* MIPS major opcodes */ @@ -446,6 +446,103 @@ enum { OPC_BC2 = (0x08 << 21) | OPC_CP2, }; +#define MASK_LMI(op) (MASK_OP_MAJOR(op) | (op & (0x1F << 21)) | (op & 0x1F)) + +enum { + OPC_PADDSH = (24 << 21) | (0x00) | OPC_CP2, + OPC_PADDUSH = (25 << 21) | (0x00) | OPC_CP2, + OPC_PADDH = (26 << 21) | (0x00) | OPC_CP2, + OPC_PADDW = (27 << 21) | (0x00) | OPC_CP2, + OPC_PADDSB = (28 << 21) | (0x00) | OPC_CP2, + OPC_PADDUSB = (29 << 21) | (0x00) | OPC_CP2, + OPC_PADDB = (30 << 21) | (0x00) | OPC_CP2, + OPC_PADDD = (31 << 21) | (0x00) | OPC_CP2, + + OPC_PSUBSH = (24 << 21) | (0x01) | OPC_CP2, + OPC_PSUBUSH = (25 << 21) | (0x01) | OPC_CP2, + OPC_PSUBH = (26 << 21) | (0x01) | OPC_CP2, + OPC_PSUBW = (27 << 21) | (0x01) | OPC_CP2, + OPC_PSUBSB = (28 << 21) | (0x01) | OPC_CP2, + OPC_PSUBUSB = (29 << 21) | (0x01) | OPC_CP2, + OPC_PSUBB = (30 << 21) | (0x01) | OPC_CP2, + OPC_PSUBD = (31 << 21) | (0x01) | OPC_CP2, + + OPC_PSHUFH = (24 << 21) | (0x02) | OPC_CP2, + OPC_PACKSSWH = (25 << 21) | (0x02) | OPC_CP2, + OPC_PACKSSHB = (26 << 21) | (0x02) | OPC_CP2, + OPC_PACKUSHB = (27 << 21) | (0x02) | OPC_CP2, + OPC_XOR_CP2 = (28 << 21) | (0x02) | OPC_CP2, + OPC_NOR_CP2 = (29 << 21) | (0x02) | OPC_CP2, + OPC_AND_CP2 = (30 << 21) | (0x02) | OPC_CP2, + OPC_PANDN = (31 << 21) | (0x02) | OPC_CP2, + + OPC_PUNPCKLHW = (24 << 21) | (0x03) | OPC_CP2, + OPC_PUNPCKHHW = (25 << 21) | (0x03) | OPC_CP2, + OPC_PUNPCKLBH = (26 << 21) | (0x03) | OPC_CP2, + OPC_PUNPCKHBH = (27 << 21) | (0x03) | OPC_CP2, + OPC_PINSRH_0 = (28 << 21) | (0x03) | OPC_CP2, + OPC_PINSRH_1 = (29 << 21) | (0x03) | OPC_CP2, + OPC_PINSRH_2 = (30 << 21) | (0x03) | OPC_CP2, + OPC_PINSRH_3 = (31 << 21) | (0x03) | OPC_CP2, + + OPC_PAVGH = (24 << 21) | (0x08) | OPC_CP2, + OPC_PAVGB = (25 << 21) | (0x08) | OPC_CP2, + OPC_PMAXSH = (26 << 21) | (0x08) | OPC_CP2, + OPC_PMINSH = (27 << 21) | (0x08) | OPC_CP2, + OPC_PMAXUB = (28 << 21) | (0x08) | OPC_CP2, + OPC_PMINUB = (29 << 21) | (0x08) | OPC_CP2, + + OPC_PCMPEQW = (24 << 21) | (0x09) | OPC_CP2, + OPC_PCMPGTW = (25 << 21) | (0x09) | OPC_CP2, + OPC_PCMPEQH = (26 << 21) | (0x09) | OPC_CP2, + OPC_PCMPGTH = (27 << 21) | (0x09) | OPC_CP2, + OPC_PCMPEQB = (28 << 21) | (0x09) | OPC_CP2, + OPC_PCMPGTB = (29 << 21) | (0x09) | OPC_CP2, + + OPC_PSLLW = (24 << 21) | (0x0A) | OPC_CP2, + OPC_PSLLH = (25 << 21) | (0x0A) | OPC_CP2, + OPC_PMULLH = (26 << 21) | (0x0A) | OPC_CP2, + OPC_PMULHH = (27 << 21) | (0x0A) | OPC_CP2, + OPC_PMULUW = (28 << 21) | (0x0A) | OPC_CP2, + OPC_PMULHUH = (29 << 21) | (0x0A) | OPC_CP2, + + OPC_PSRLW = (24 << 21) | (0x0B) | OPC_CP2, + OPC_PSRLH = (25 << 21) | (0x0B) | OPC_CP2, + OPC_PSRAW = (26 << 21) | (0x0B) | OPC_CP2, + OPC_PSRAH = (27 << 21) | (0x0B) | OPC_CP2, + OPC_PUNPCKLWD = (28 << 21) | (0x0B) | OPC_CP2, + OPC_PUNPCKHWD = (29 << 21) | (0x0B) | OPC_CP2, + + OPC_ADDU_CP2 = (24 << 21) | (0x0C) | OPC_CP2, + OPC_OR_CP2 = (25 << 21) | (0x0C) | OPC_CP2, + OPC_ADD_CP2 = (26 << 21) | (0x0C) | OPC_CP2, + OPC_DADD_CP2 = (27 << 21) | (0x0C) | OPC_CP2, + OPC_SEQU_CP2 = (28 << 21) | (0x0C) | OPC_CP2, + OPC_SEQ_CP2 = (29 << 21) | (0x0C) | OPC_CP2, + + OPC_SUBU_CP2 = (24 << 21) | (0x0D) | OPC_CP2, + OPC_PASUBUB = (25 << 21) | (0x0D) | OPC_CP2, + OPC_SUB_CP2 = (26 << 21) | (0x0D) | OPC_CP2, + OPC_DSUB_CP2 = (27 << 21) | (0x0D) | OPC_CP2, + OPC_SLTU_CP2 = (28 << 21) | (0x0D) | OPC_CP2, + OPC_SLT_CP2 = (29 << 21) | (0x0D) | OPC_CP2, + + OPC_SLL_CP2 = (24 << 21) | (0x0E) | OPC_CP2, + OPC_DSLL_CP2 = (25 << 21) | (0x0E) | OPC_CP2, + OPC_PEXTRH = (26 << 21) | (0x0E) | OPC_CP2, + OPC_PMADDHW = (27 << 21) | (0x0E) | OPC_CP2, + OPC_SLEU_CP2 = (28 << 21) | (0x0E) | OPC_CP2, + OPC_SLE_CP2 = (29 << 21) | (0x0E) | OPC_CP2, + + OPC_SRL_CP2 = (24 << 21) | (0x0F) | OPC_CP2, + OPC_DSRL_CP2 = (25 << 21) | (0x0F) | OPC_CP2, + OPC_SRA_CP2 = (26 << 21) | (0x0F) | OPC_CP2, + OPC_DSRA_CP2 = (27 << 21) | (0x0F) | OPC_CP2, + OPC_BIADD = (28 << 21) | (0x0F) | OPC_CP2, + OPC_PMOVMSKB = (29 << 21) | (0x0F) | OPC_CP2, +}; + + #define MASK_CP3(op) MASK_OP_MAJOR(op) | (op & 0x3F) enum { @@ -566,22 +663,25 @@ static const char *fregnames[] = "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", }; -#ifdef MIPS_DEBUG_DISAS -#define MIPS_DEBUG(fmt, ...) \ - qemu_log_mask(CPU_LOG_TB_IN_ASM, \ - TARGET_FMT_lx ": %08x " fmt "\n", \ - ctx->pc, ctx->opcode , ## __VA_ARGS__) -#define LOG_DISAS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__) -#else -#define MIPS_DEBUG(fmt, ...) do { } while(0) -#define LOG_DISAS(...) do { } while (0) -#endif +#define MIPS_DEBUG(fmt, ...) \ + do { \ + if (MIPS_DEBUG_DISAS) { \ + qemu_log_mask(CPU_LOG_TB_IN_ASM, \ + TARGET_FMT_lx ": %08x " fmt "\n", \ + ctx->pc, ctx->opcode , ## __VA_ARGS__); \ + } \ + } while (0) + +#define LOG_DISAS(...) \ + do { \ + if (MIPS_DEBUG_DISAS) { \ + qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__); \ + } \ + } while (0) #define MIPS_INVAL(op) \ -do { \ MIPS_DEBUG("Invalid %s %03x %03x %03x", op, ctx->opcode >> 26, \ - ctx->opcode & 0x3F, ((ctx->opcode >> 16) & 0x1F)); \ -} while (0) + ctx->opcode & 0x3F, ((ctx->opcode >> 16) & 0x1F)) /* General purpose registers moves. */ static inline void gen_load_gpr (TCGv t, int reg) @@ -1431,7 +1531,8 @@ static void gen_arith_imm (CPUMIPSState *env, DisasContext *ctx, uint32_t opc, } /* Logic with immediate operand */ -static void gen_logic_imm (CPUMIPSState *env, uint32_t opc, int rt, int rs, int16_t imm) +static void gen_logic_imm(CPUMIPSState *env, DisasContext *ctx, uint32_t opc, + int rt, int rs, int16_t imm) { target_ulong uimm; const char *opn = "imm logic"; @@ -1474,7 +1575,8 @@ static void gen_logic_imm (CPUMIPSState *env, uint32_t opc, int rt, int rs, int1 } /* Set on less than with immediate operand */ -static void gen_slt_imm (CPUMIPSState *env, uint32_t opc, int rt, int rs, int16_t imm) +static void gen_slt_imm(CPUMIPSState *env, DisasContext *ctx, uint32_t opc, + int rt, int rs, int16_t imm) { target_ulong uimm = (target_long)imm; /* Sign extend to 32/64 bits */ const char *opn = "imm arith"; @@ -1775,7 +1877,8 @@ static void gen_arith (CPUMIPSState *env, DisasContext *ctx, uint32_t opc, } /* Conditional move */ -static void gen_cond_move (CPUMIPSState *env, uint32_t opc, int rd, int rs, int rt) +static void gen_cond_move(CPUMIPSState *env, DisasContext *ctx, uint32_t opc, + int rd, int rs, int rt) { const char *opn = "cond move"; int l1; @@ -1813,7 +1916,8 @@ static void gen_cond_move (CPUMIPSState *env, uint32_t opc, int rd, int rs, int } /* Logic */ -static void gen_logic (CPUMIPSState *env, uint32_t opc, int rd, int rs, int rt) +static void gen_logic(CPUMIPSState *env, DisasContext *ctx, uint32_t opc, + int rd, int rs, int rt) { const char *opn = "logic"; @@ -1874,7 +1978,8 @@ static void gen_logic (CPUMIPSState *env, uint32_t opc, int rd, int rs, int rt) } /* Set on lower than */ -static void gen_slt (CPUMIPSState *env, uint32_t opc, int rd, int rs, int rt) +static void gen_slt(CPUMIPSState *env, DisasContext *ctx, uint32_t opc, + int rd, int rs, int rt) { const char *opn = "slt"; TCGv t0, t1; @@ -2380,8 +2485,8 @@ static void gen_cl (DisasContext *ctx, uint32_t opc, } /* Godson integer instructions */ -static void gen_loongson_integer (DisasContext *ctx, uint32_t opc, - int rd, int rs, int rt) +static void gen_loongson_integer(DisasContext *ctx, uint32_t opc, + int rd, int rs, int rt) { const char *opn = "loongson"; TCGv t0, t1; @@ -2594,6 +2699,278 @@ static void gen_loongson_integer (DisasContext *ctx, uint32_t opc, tcg_temp_free(t1); } +/* Loongson multimedia instructions */ +static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt) +{ + const char *opn = "loongson_cp2"; + uint32_t opc, shift_max; + TCGv_i64 t0, t1; + + opc = MASK_LMI(ctx->opcode); + switch (opc) { + case OPC_ADD_CP2: + case OPC_SUB_CP2: + case OPC_DADD_CP2: + case OPC_DSUB_CP2: + t0 = tcg_temp_local_new_i64(); + t1 = tcg_temp_local_new_i64(); + break; + default: + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + break; + } + + gen_load_fpr64(ctx, t0, rs); + gen_load_fpr64(ctx, t1, rt); + +#define LMI_HELPER(UP, LO) \ + case OPC_##UP: gen_helper_##LO(t0, t0, t1); opn = #LO; break +#define LMI_HELPER_1(UP, LO) \ + case OPC_##UP: gen_helper_##LO(t0, t0); opn = #LO; break +#define LMI_DIRECT(UP, LO, OP) \ + case OPC_##UP: tcg_gen_##OP##_i64(t0, t0, t1); opn = #LO; break + + switch (opc) { + LMI_HELPER(PADDSH, paddsh); + LMI_HELPER(PADDUSH, paddush); + LMI_HELPER(PADDH, paddh); + LMI_HELPER(PADDW, paddw); + LMI_HELPER(PADDSB, paddsb); + LMI_HELPER(PADDUSB, paddusb); + LMI_HELPER(PADDB, paddb); + + LMI_HELPER(PSUBSH, psubsh); + LMI_HELPER(PSUBUSH, psubush); + LMI_HELPER(PSUBH, psubh); + LMI_HELPER(PSUBW, psubw); + LMI_HELPER(PSUBSB, psubsb); + LMI_HELPER(PSUBUSB, psubusb); + LMI_HELPER(PSUBB, psubb); + + LMI_HELPER(PSHUFH, pshufh); + LMI_HELPER(PACKSSWH, packsswh); + LMI_HELPER(PACKSSHB, packsshb); + LMI_HELPER(PACKUSHB, packushb); + + LMI_HELPER(PUNPCKLHW, punpcklhw); + LMI_HELPER(PUNPCKHHW, punpckhhw); + LMI_HELPER(PUNPCKLBH, punpcklbh); + LMI_HELPER(PUNPCKHBH, punpckhbh); + LMI_HELPER(PUNPCKLWD, punpcklwd); + LMI_HELPER(PUNPCKHWD, punpckhwd); + + LMI_HELPER(PAVGH, pavgh); + LMI_HELPER(PAVGB, pavgb); + LMI_HELPER(PMAXSH, pmaxsh); + LMI_HELPER(PMINSH, pminsh); + LMI_HELPER(PMAXUB, pmaxub); + LMI_HELPER(PMINUB, pminub); + + LMI_HELPER(PCMPEQW, pcmpeqw); + LMI_HELPER(PCMPGTW, pcmpgtw); + LMI_HELPER(PCMPEQH, pcmpeqh); + LMI_HELPER(PCMPGTH, pcmpgth); + LMI_HELPER(PCMPEQB, pcmpeqb); + LMI_HELPER(PCMPGTB, pcmpgtb); + + LMI_HELPER(PSLLW, psllw); + LMI_HELPER(PSLLH, psllh); + LMI_HELPER(PSRLW, psrlw); + LMI_HELPER(PSRLH, psrlh); + LMI_HELPER(PSRAW, psraw); + LMI_HELPER(PSRAH, psrah); + + LMI_HELPER(PMULLH, pmullh); + LMI_HELPER(PMULHH, pmulhh); + LMI_HELPER(PMULHUH, pmulhuh); + LMI_HELPER(PMADDHW, pmaddhw); + + LMI_HELPER(PASUBUB, pasubub); + LMI_HELPER_1(BIADD, biadd); + LMI_HELPER_1(PMOVMSKB, pmovmskb); + + LMI_DIRECT(PADDD, paddd, add); + LMI_DIRECT(PSUBD, psubd, sub); + LMI_DIRECT(XOR_CP2, xor, xor); + LMI_DIRECT(NOR_CP2, nor, nor); + LMI_DIRECT(AND_CP2, and, and); + LMI_DIRECT(PANDN, pandn, andc); + LMI_DIRECT(OR, or, or); + + case OPC_PINSRH_0: + tcg_gen_deposit_i64(t0, t0, t1, 0, 16); + opn = "pinsrh_0"; + break; + case OPC_PINSRH_1: + tcg_gen_deposit_i64(t0, t0, t1, 16, 16); + opn = "pinsrh_1"; + break; + case OPC_PINSRH_2: + tcg_gen_deposit_i64(t0, t0, t1, 32, 16); + opn = "pinsrh_2"; + break; + case OPC_PINSRH_3: + tcg_gen_deposit_i64(t0, t0, t1, 48, 16); + opn = "pinsrh_3"; + break; + + case OPC_PEXTRH: + tcg_gen_andi_i64(t1, t1, 3); + tcg_gen_shli_i64(t1, t1, 4); + tcg_gen_shr_i64(t0, t0, t1); + tcg_gen_ext16u_i64(t0, t0); + opn = "pextrh"; + break; + + case OPC_ADDU_CP2: + tcg_gen_add_i64(t0, t0, t1); + tcg_gen_ext32s_i64(t0, t0); + opn = "addu"; + break; + case OPC_SUBU_CP2: + tcg_gen_sub_i64(t0, t0, t1); + tcg_gen_ext32s_i64(t0, t0); + opn = "addu"; + break; + + case OPC_SLL_CP2: + opn = "sll"; + shift_max = 32; + goto do_shift; + case OPC_SRL_CP2: + opn = "srl"; + shift_max = 32; + goto do_shift; + case OPC_SRA_CP2: + opn = "sra"; + shift_max = 32; + goto do_shift; + case OPC_DSLL_CP2: + opn = "dsll"; + shift_max = 64; + goto do_shift; + case OPC_DSRL_CP2: + opn = "dsrl"; + shift_max = 64; + goto do_shift; + case OPC_DSRA_CP2: + opn = "dsra"; + shift_max = 64; + goto do_shift; + do_shift: + /* Make sure shift count isn't TCG undefined behaviour. */ + tcg_gen_andi_i64(t1, t1, shift_max - 1); + + switch (opc) { + case OPC_SLL_CP2: + case OPC_DSLL_CP2: + tcg_gen_shl_i64(t0, t0, t1); + break; + case OPC_SRA_CP2: + case OPC_DSRA_CP2: + /* Since SRA is UndefinedResult without sign-extended inputs, + we can treat SRA and DSRA the same. */ + tcg_gen_sar_i64(t0, t0, t1); + break; + case OPC_SRL_CP2: + /* We want to shift in zeros for SRL; zero-extend first. */ + tcg_gen_ext32u_i64(t0, t0); + /* FALLTHRU */ + case OPC_DSRL_CP2: + tcg_gen_shr_i64(t0, t0, t1); + break; + } + + if (shift_max == 32) { + tcg_gen_ext32s_i64(t0, t0); + } + + /* Shifts larger than MAX produce zero. */ + tcg_gen_setcondi_i64(TCG_COND_LTU, t1, t1, shift_max); + tcg_gen_neg_i64(t1, t1); + tcg_gen_and_i64(t0, t0, t1); + break; + + case OPC_ADD_CP2: + case OPC_DADD_CP2: + { + TCGv_i64 t2 = tcg_temp_new_i64(); + int lab = gen_new_label(); + + tcg_gen_mov_i64(t2, t0); + tcg_gen_add_i64(t0, t1, t2); + if (opc == OPC_ADD_CP2) { + tcg_gen_ext32s_i64(t0, t0); + } + tcg_gen_xor_i64(t1, t1, t2); + tcg_gen_xor_i64(t2, t2, t0); + tcg_gen_andc_i64(t1, t2, t1); + tcg_temp_free_i64(t2); + tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab); + generate_exception(ctx, EXCP_OVERFLOW); + gen_set_label(lab); + + opn = (opc == OPC_ADD_CP2 ? "add" : "dadd"); + break; + } + + case OPC_SUB_CP2: + case OPC_DSUB_CP2: + { + TCGv_i64 t2 = tcg_temp_new_i64(); + int lab = gen_new_label(); + + tcg_gen_mov_i64(t2, t0); + tcg_gen_sub_i64(t0, t1, t2); + if (opc == OPC_SUB_CP2) { + tcg_gen_ext32s_i64(t0, t0); + } + tcg_gen_xor_i64(t1, t1, t2); + tcg_gen_xor_i64(t2, t2, t0); + tcg_gen_and_i64(t1, t1, t2); + tcg_temp_free_i64(t2); + tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab); + generate_exception(ctx, EXCP_OVERFLOW); + gen_set_label(lab); + + opn = (opc == OPC_SUB_CP2 ? "sub" : "dsub"); + break; + } + + case OPC_PMULUW: + tcg_gen_ext32u_i64(t0, t0); + tcg_gen_ext32u_i64(t1, t1); + tcg_gen_mul_i64(t0, t0, t1); + opn = "pmuluw"; + break; + + case OPC_SEQU_CP2: + case OPC_SEQ_CP2: + case OPC_SLTU_CP2: + case OPC_SLT_CP2: + case OPC_SLEU_CP2: + case OPC_SLE_CP2: + /* ??? Document is unclear: Set FCC[CC]. Does that mean the + FD field is the CC field? */ + default: + MIPS_INVAL(opn); + generate_exception(ctx, EXCP_RI); + return; + } + +#undef LMI_HELPER +#undef LMI_DIRECT + + gen_store_fpr64(ctx, t0, rd); + + (void)opn; /* avoid a compiler warning */ + MIPS_DEBUG("%s %s, %s, %s", opn, + fregnames[rd], fregnames[rs], fregnames[rt]); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); +} + /* Traps */ static void gen_trap (DisasContext *ctx, uint32_t opc, int rs, int rt, int16_t imm) @@ -8778,10 +9155,10 @@ static int decode_extended_mips16_opc (CPUMIPSState *env, DisasContext *ctx, gen_arith_imm(env, ctx, OPC_ADDIU, rx, rx, imm); break; case M16_OPC_SLTI: - gen_slt_imm(env, OPC_SLTI, 24, rx, imm); + gen_slt_imm(env, ctx, OPC_SLTI, 24, rx, imm); break; case M16_OPC_SLTIU: - gen_slt_imm(env, OPC_SLTIU, 24, rx, imm); + gen_slt_imm(env, ctx, OPC_SLTIU, 24, rx, imm); break; case M16_OPC_I8: switch (funct) { @@ -8992,15 +9369,13 @@ static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx, case M16_OPC_SLTI: { int16_t imm = (uint8_t) ctx->opcode; - - gen_slt_imm(env, OPC_SLTI, 24, rx, imm); + gen_slt_imm(env, ctx, OPC_SLTI, 24, rx, imm); } break; case M16_OPC_SLTIU: { int16_t imm = (uint8_t) ctx->opcode; - - gen_slt_imm(env, OPC_SLTIU, 24, rx, imm); + gen_slt_imm(env, ctx, OPC_SLTIU, 24, rx, imm); } break; case M16_OPC_I8: @@ -9075,8 +9450,7 @@ static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx, case M16_OPC_CMPI: { int16_t imm = (uint8_t) ctx->opcode; - - gen_logic_imm(env, OPC_XORI, 24, rx, imm); + gen_logic_imm(env, ctx, OPC_XORI, 24, rx, imm); } break; #if defined(TARGET_MIPS64) @@ -9188,10 +9562,10 @@ static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx, } break; case RR_SLT: - gen_slt(env, OPC_SLT, 24, rx, ry); + gen_slt(env, ctx, OPC_SLT, 24, rx, ry); break; case RR_SLTU: - gen_slt(env, OPC_SLTU, 24, rx, ry); + gen_slt(env, ctx, OPC_SLTU, 24, rx, ry); break; case RR_BREAK: generate_exception(ctx, EXCP_BREAK); @@ -9212,22 +9586,22 @@ static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx, break; #endif case RR_CMP: - gen_logic(env, OPC_XOR, 24, rx, ry); + gen_logic(env, ctx, OPC_XOR, 24, rx, ry); break; case RR_NEG: gen_arith(env, ctx, OPC_SUBU, rx, 0, ry); break; case RR_AND: - gen_logic(env, OPC_AND, rx, rx, ry); + gen_logic(env, ctx, OPC_AND, rx, rx, ry); break; case RR_OR: - gen_logic(env, OPC_OR, rx, rx, ry); + gen_logic(env, ctx, OPC_OR, rx, rx, ry); break; case RR_XOR: - gen_logic(env, OPC_XOR, rx, rx, ry); + gen_logic(env, ctx, OPC_XOR, rx, rx, ry); break; case RR_NOT: - gen_logic(env, OPC_NOR, rx, ry, 0); + gen_logic(env, ctx, OPC_NOR, rx, ry, 0); break; case RR_MFHI: gen_HILO(ctx, OPC_MFHI, rx); @@ -9849,12 +10223,13 @@ static void gen_andi16 (CPUMIPSState *env, DisasContext *ctx) int rs = mmreg(uMIPS_RS(ctx->opcode)); int encoded = ZIMM(ctx->opcode, 0, 4); - gen_logic_imm(env, OPC_ANDI, rd, rs, decoded_imm[encoded]); + gen_logic_imm(env, ctx, OPC_ANDI, rd, rs, decoded_imm[encoded]); } static void gen_ldst_multiple (DisasContext *ctx, uint32_t opc, int reglist, int base, int16_t offset) { + const char *opn = "ldst_multiple"; TCGv t0, t1; TCGv_i32 t2; @@ -9874,19 +10249,24 @@ static void gen_ldst_multiple (DisasContext *ctx, uint32_t opc, int reglist, switch (opc) { case LWM32: gen_helper_lwm(cpu_env, t0, t1, t2); + opn = "lwm"; break; case SWM32: gen_helper_swm(cpu_env, t0, t1, t2); + opn = "swm"; break; #ifdef TARGET_MIPS64 case LDM: gen_helper_ldm(cpu_env, t0, t1, t2); + opn = "ldm"; break; case SDM: gen_helper_sdm(cpu_env, t0, t1, t2); + opn = "sdm"; break; #endif } + (void)opn; MIPS_DEBUG("%s, %x, %d(%s)", opn, reglist, offset, regnames[base]); tcg_temp_free(t0); tcg_temp_free(t1); @@ -9905,25 +10285,25 @@ static void gen_pool16c_insn (CPUMIPSState *env, DisasContext *ctx, int *is_bran case NOT16 + 1: case NOT16 + 2: case NOT16 + 3: - gen_logic(env, OPC_NOR, rd, rs, 0); + gen_logic(env, ctx, OPC_NOR, rd, rs, 0); break; case XOR16 + 0: case XOR16 + 1: case XOR16 + 2: case XOR16 + 3: - gen_logic(env, OPC_XOR, rd, rd, rs); + gen_logic(env, ctx, OPC_XOR, rd, rd, rs); break; case AND16 + 0: case AND16 + 1: case AND16 + 2: case AND16 + 3: - gen_logic(env, OPC_AND, rd, rd, rs); + gen_logic(env, ctx, OPC_AND, rd, rd, rs); break; case OR16 + 0: case OR16 + 1: case OR16 + 2: case OR16 + 3: - gen_logic(env, OPC_OR, rd, rd, rs); + gen_logic(env, ctx, OPC_OR, rd, rd, rs); break; case LWM16 + 0: case LWM16 + 1: @@ -10737,7 +11117,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx, case XOR32: mips32_op = OPC_XOR; do_logic: - gen_logic(env, mips32_op, rd, rs, rt); + gen_logic(env, ctx, mips32_op, rd, rs, rt); break; /* Set less than */ case SLT: @@ -10746,7 +11126,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx, case SLTU: mips32_op = OPC_SLTU; do_slt: - gen_slt(env, mips32_op, rd, rs, rt); + gen_slt(env, ctx, mips32_op, rd, rs, rt); break; default: goto pool32a_invalid; @@ -10762,7 +11142,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx, case MOVZ: mips32_op = OPC_MOVZ; do_cmov: - gen_cond_move(env, mips32_op, rd, rs, rt); + gen_cond_move(env, ctx, mips32_op, rd, rs, rt); break; case LWXS: gen_ldxs(ctx, rs, rt, rd); @@ -11175,7 +11555,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx, target. */ break; case LUI: - gen_logic_imm(env, OPC_LUI, rs, -1, imm); + gen_logic_imm(env, ctx, OPC_LUI, rs, -1, imm); break; case SYNCI: break; @@ -11294,7 +11674,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx, case ANDI32: mips32_op = OPC_ANDI; do_logici: - gen_logic_imm(env, mips32_op, rt, rs, imm); + gen_logic_imm(env, ctx, mips32_op, rt, rs, imm); break; /* Set less than immediate */ @@ -11304,7 +11684,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx, case SLTIU32: mips32_op = OPC_SLTIU; do_slti: - gen_slt_imm(env, mips32_op, rt, rs, imm); + gen_slt_imm(env, ctx, mips32_op, rt, rs, imm); break; case JALX32: offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2; @@ -11781,7 +12161,7 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch) case OPC_MOVZ: check_insn(env, ctx, ISA_MIPS4 | ISA_MIPS32 | INSN_LOONGSON2E | INSN_LOONGSON2F); - gen_cond_move(env, op1, rd, rs, rt); + gen_cond_move(env, ctx, op1, rd, rs, rt); break; case OPC_ADD ... OPC_SUBU: gen_arith(env, ctx, op1, rd, rs, rt); @@ -11808,13 +12188,13 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch) break; case OPC_SLT: /* Set on less than */ case OPC_SLTU: - gen_slt(env, op1, rd, rs, rt); + gen_slt(env, ctx, op1, rd, rs, rt); break; case OPC_AND: /* Logic*/ case OPC_OR: case OPC_NOR: case OPC_XOR: - gen_logic(env, op1, rd, rs, rt); + gen_logic(env, ctx, op1, rd, rs, rt); break; case OPC_MULT ... OPC_DIVU: if (sa) { @@ -12215,13 +12595,13 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch) break; case OPC_SLTI: /* Set on less than with immediate opcode */ case OPC_SLTIU: - gen_slt_imm(env, op, rt, rs, imm); + gen_slt_imm(env, ctx, op, rt, rs, imm); break; case OPC_ANDI: /* Arithmetic with immediate opcode */ case OPC_LUI: case OPC_ORI: case OPC_XORI: - gen_logic_imm(env, op, rt, rs, imm); + gen_logic_imm(env, ctx, op, rt, rs, imm); break; case OPC_J ... OPC_JAL: /* Jump */ offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2; @@ -12316,10 +12696,14 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch) case OPC_LDC2: case OPC_SWC2: case OPC_SDC2: - case OPC_CP2: /* COP2: Not implemented. */ generate_exception_err(ctx, EXCP_CpU, 2); break; + case OPC_CP2: + check_insn(env, ctx, INSN_LOONGSON2F); + /* Note that these instructions use different fields. */ + gen_loongson_multimedia(ctx, sa, rd, rt); + break; case OPC_CP3: if (env->CP0_Config1 & (1 << CP0C1_FP)) { diff --git a/target-sh4/helper.h b/target-sh4/helper.h index 6e4f108124..6c1a47da9f 100644 --- a/target-sh4/helper.h +++ b/target-sh4/helper.h @@ -1,30 +1,26 @@ #include "def-helper.h" DEF_HELPER_1(ldtlb, void, env) -DEF_HELPER_1(raise_illegal_instruction, void, env) -DEF_HELPER_1(raise_slot_illegal_instruction, void, env) -DEF_HELPER_1(raise_fpu_disable, void, env) -DEF_HELPER_1(raise_slot_fpu_disable, void, env) -DEF_HELPER_1(debug, void, env) -DEF_HELPER_2(sleep, void, env, i32) -DEF_HELPER_2(trapa, void, env, i32) +DEF_HELPER_1(raise_illegal_instruction, noreturn, env) +DEF_HELPER_1(raise_slot_illegal_instruction, noreturn, env) +DEF_HELPER_1(raise_fpu_disable, noreturn, env) +DEF_HELPER_1(raise_slot_fpu_disable, noreturn, env) +DEF_HELPER_1(debug, noreturn, env) +DEF_HELPER_1(sleep, noreturn, env) +DEF_HELPER_2(trapa, noreturn, env, i32) DEF_HELPER_3(movcal, void, env, i32, i32) DEF_HELPER_1(discard_movcal_backup, void, env) DEF_HELPER_2(ocbi, void, env, i32) -DEF_HELPER_3(addv, i32, env, i32, i32) -DEF_HELPER_3(addc, i32, env, i32, i32) -DEF_HELPER_3(subv, i32, env, i32, i32) -DEF_HELPER_3(subc, i32, env, i32, i32) DEF_HELPER_3(div1, i32, env, i32, i32) DEF_HELPER_3(macl, void, env, i32, i32) DEF_HELPER_3(macw, void, env, i32, i32) DEF_HELPER_2(ld_fpscr, void, env, i32) -DEF_HELPER_1(fabs_FT, f32, f32) -DEF_HELPER_1(fabs_DT, f64, f64) +DEF_HELPER_FLAGS_1(fabs_FT, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32) +DEF_HELPER_FLAGS_1(fabs_DT, TCG_CALL_CONST | TCG_CALL_PURE, f64, f64) DEF_HELPER_3(fadd_FT, f32, env, f32, f32) DEF_HELPER_3(fadd_DT, f64, env, f64, f64) DEF_HELPER_2(fcnvsd_FT_DT, f64, env, f32) @@ -41,7 +37,7 @@ DEF_HELPER_2(float_DT, f64, env, i32) DEF_HELPER_4(fmac_FT, f32, env, f32, f32, f32) DEF_HELPER_3(fmul_FT, f32, env, f32, f32) DEF_HELPER_3(fmul_DT, f64, env, f64, f64) -DEF_HELPER_1(fneg_T, f32, f32) +DEF_HELPER_FLAGS_1(fneg_T, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32) DEF_HELPER_3(fsub_FT, f32, env, f32, f32) DEF_HELPER_3(fsub_DT, f64, env, f64, f64) DEF_HELPER_2(fsqrt_FT, f32, env, f32) diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c index 9b4328de37..60ec4cbc4d 100644 --- a/target-sh4/op_helper.c +++ b/target-sh4/op_helper.c @@ -21,7 +21,8 @@ #include "cpu.h" #include "helper.h" -static void cpu_restore_state_from_retaddr(CPUSH4State *env, uintptr_t retaddr) +static inline void cpu_restore_state_from_retaddr(CPUSH4State *env, + uintptr_t retaddr) { TranslationBlock *tb; @@ -77,8 +78,8 @@ void helper_ldtlb(CPUSH4State *env) #endif } -static inline void raise_exception(CPUSH4State *env, int index, - uintptr_t retaddr) +static inline void QEMU_NORETURN raise_exception(CPUSH4State *env, int index, + uintptr_t retaddr) { env->exception_index = index; cpu_restore_state_from_retaddr(env, retaddr); @@ -87,43 +88,40 @@ static inline void raise_exception(CPUSH4State *env, int index, void helper_raise_illegal_instruction(CPUSH4State *env) { - raise_exception(env, 0x180, GETPC()); + raise_exception(env, 0x180, 0); } void helper_raise_slot_illegal_instruction(CPUSH4State *env) { - raise_exception(env, 0x1a0, GETPC()); + raise_exception(env, 0x1a0, 0); } void helper_raise_fpu_disable(CPUSH4State *env) { - raise_exception(env, 0x800, GETPC()); + raise_exception(env, 0x800, 0); } void helper_raise_slot_fpu_disable(CPUSH4State *env) { - raise_exception(env, 0x820, GETPC()); + raise_exception(env, 0x820, 0); } void helper_debug(CPUSH4State *env) { - env->exception_index = EXCP_DEBUG; - cpu_loop_exit(env); + raise_exception(env, EXCP_DEBUG, 0); } -void helper_sleep(CPUSH4State *env, uint32_t next_pc) +void helper_sleep(CPUSH4State *env) { env->halted = 1; env->in_sleep = 1; - env->exception_index = EXCP_HLT; - env->pc = next_pc; - cpu_loop_exit(env); + raise_exception(env, EXCP_HLT, 0); } void helper_trapa(CPUSH4State *env, uint32_t tra) { env->tra = tra << 2; - raise_exception(env, 0x160, GETPC()); + raise_exception(env, 0x160, 0); } void helper_movcal(CPUSH4State *env, uint32_t address, uint32_t value) @@ -177,51 +175,6 @@ void helper_ocbi(CPUSH4State *env, uint32_t address) } } -uint32_t helper_addc(CPUSH4State *env, uint32_t arg0, uint32_t arg1) -{ - uint32_t tmp0, tmp1; - - tmp1 = arg0 + arg1; - tmp0 = arg1; - arg1 = tmp1 + (env->sr & 1); - if (tmp0 > tmp1) - env->sr |= SR_T; - else - env->sr &= ~SR_T; - if (tmp1 > arg1) - env->sr |= SR_T; - return arg1; -} - -uint32_t helper_addv(CPUSH4State *env, uint32_t arg0, uint32_t arg1) -{ - uint32_t dest, src, ans; - - if ((int32_t) arg1 >= 0) - dest = 0; - else - dest = 1; - if ((int32_t) arg0 >= 0) - src = 0; - else - src = 1; - src += dest; - arg1 += arg0; - if ((int32_t) arg1 >= 0) - ans = 0; - else - ans = 1; - ans += dest; - if (src == 0 || src == 2) { - if (ans == 1) - env->sr |= SR_T; - else - env->sr &= ~SR_T; - } else - env->sr &= ~SR_T; - return arg1; -} - #define T (env->sr & SR_T) #define Q (env->sr & SR_Q ? 1 : 0) #define M (env->sr & SR_M ? 1 : 0) @@ -375,51 +328,6 @@ void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1) } } -uint32_t helper_subc(CPUSH4State *env, uint32_t arg0, uint32_t arg1) -{ - uint32_t tmp0, tmp1; - - tmp1 = arg1 - arg0; - tmp0 = arg1; - arg1 = tmp1 - (env->sr & SR_T); - if (tmp0 < tmp1) - env->sr |= SR_T; - else - env->sr &= ~SR_T; - if (tmp1 < arg1) - env->sr |= SR_T; - return arg1; -} - -uint32_t helper_subv(CPUSH4State *env, uint32_t arg0, uint32_t arg1) -{ - int32_t dest, src, ans; - - if ((int32_t) arg1 >= 0) - dest = 0; - else - dest = 1; - if ((int32_t) arg0 >= 0) - src = 0; - else - src = 1; - src += dest; - arg1 -= arg0; - if ((int32_t) arg1 >= 0) - ans = 0; - else - ans = 1; - ans += dest; - if (src == 1) { - if (ans == 1) - env->sr |= SR_T; - else - env->sr &= ~SR_T; - } else - env->sr &= ~SR_T; - return arg1; -} - static inline void set_t(CPUSH4State *env) { env->sr |= SR_T; @@ -475,9 +383,7 @@ static void update_fpscr(CPUSH4State *env, uintptr_t retaddr) cause = (env->fpscr & FPSCR_CAUSE_MASK) >> FPSCR_CAUSE_SHIFT; enable = (env->fpscr & FPSCR_ENABLE_MASK) >> FPSCR_ENABLE_SHIFT; if (cause & enable) { - cpu_restore_state_from_retaddr(env, retaddr); - env->exception_index = 0x120; - cpu_loop_exit(env); + raise_exception(env, 0x120, retaddr); } } } @@ -623,8 +529,7 @@ float64 helper_float_DT(CPUSH4State *env, uint32_t t0) float32 helper_fmac_FT(CPUSH4State *env, float32 t0, float32 t1, float32 t2) { set_float_exception_flags(0, &env->fp_status); - t0 = float32_mul(t0, t1, &env->fp_status); - t0 = float32_add(t0, t2, &env->fp_status); + t0 = float32_muladd(t0, t1, t2, 0, &env->fp_status); update_fpscr(env, GETPC()); return t0; } diff --git a/target-sh4/translate.c b/target-sh4/translate.c index d05c74c8c3..0fa83cab99 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -18,7 +18,6 @@ */ #define DEBUG_DISAS -#define SH4_DEBUG_DISAS //#define SH4_SINGLE_STEP #include "cpu.h" @@ -32,8 +31,6 @@ typedef struct DisasContext { struct TranslationBlock *tb; target_ulong pc; - uint32_t sr; - uint32_t fpscr; uint16_t opcode; uint32_t flags; int bstate; @@ -47,7 +44,7 @@ typedef struct DisasContext { #if defined(CONFIG_USER_ONLY) #define IS_USER(ctx) 1 #else -#define IS_USER(ctx) (!(ctx->sr & SR_MD)) +#define IS_USER(ctx) (!(ctx->flags & SR_MD)) #endif enum { @@ -339,16 +336,6 @@ static void gen_delayed_conditional_jump(DisasContext * ctx) gen_jump(ctx); } -static inline void gen_set_t(void) -{ - tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_T); -} - -static inline void gen_clr_t(void) -{ - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); -} - static inline void gen_cmp(int cond, TCGv t0, TCGv t1) { TCGv t; @@ -423,44 +410,47 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) #define B11_8 ((ctx->opcode >> 8) & 0xf) #define B15_12 ((ctx->opcode >> 12) & 0xf) -#define REG(x) ((x) < 8 && (ctx->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB) ? \ - (cpu_gregs[x + 16]) : (cpu_gregs[x])) +#define REG(x) ((x) < 8 && (ctx->flags & (SR_MD | SR_RB)) == (SR_MD | SR_RB) \ + ? (cpu_gregs[x + 16]) : (cpu_gregs[x])) -#define ALTREG(x) ((x) < 8 && (ctx->sr & (SR_MD | SR_RB)) != (SR_MD | SR_RB) \ +#define ALTREG(x) ((x) < 8 && (ctx->flags & (SR_MD | SR_RB)) != (SR_MD | SR_RB)\ ? (cpu_gregs[x + 16]) : (cpu_gregs[x])) -#define FREG(x) (ctx->fpscr & FPSCR_FR ? (x) ^ 0x10 : (x)) +#define FREG(x) (ctx->flags & FPSCR_FR ? (x) ^ 0x10 : (x)) #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe)) -#define XREG(x) (ctx->fpscr & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x)) +#define XREG(x) (ctx->flags & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x)) #define DREG(x) FREG(x) /* Assumes lsb of (x) is always 0 */ #define CHECK_NOT_DELAY_SLOT \ if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) \ { \ + tcg_gen_movi_i32(cpu_pc, ctx->pc); \ gen_helper_raise_slot_illegal_instruction(cpu_env); \ - ctx->bstate = BS_EXCP; \ + ctx->bstate = BS_BRANCH; \ return; \ } #define CHECK_PRIVILEGED \ if (IS_USER(ctx)) { \ + tcg_gen_movi_i32(cpu_pc, ctx->pc); \ if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \ gen_helper_raise_slot_illegal_instruction(cpu_env); \ } else { \ gen_helper_raise_illegal_instruction(cpu_env); \ } \ - ctx->bstate = BS_EXCP; \ + ctx->bstate = BS_BRANCH; \ return; \ } #define CHECK_FPU_ENABLED \ if (ctx->flags & SR_FD) { \ + tcg_gen_movi_i32(cpu_pc, ctx->pc); \ if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \ gen_helper_raise_slot_fpu_disable(cpu_env); \ } else { \ gen_helper_raise_fpu_disable(cpu_env); \ } \ - ctx->bstate = BS_EXCP; \ + ctx->bstate = BS_BRANCH; \ return; \ } @@ -519,7 +509,7 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_S); return; case 0x0008: /* clrt */ - gen_clr_t(); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); return; case 0x0038: /* ldtlb */ CHECK_PRIVILEGED @@ -537,21 +527,22 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_S); return; case 0x0018: /* sett */ - gen_set_t(); + tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_T); return; case 0xfbfd: /* frchg */ tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_FR); ctx->bstate = BS_STOP; return; case 0xf3fd: /* fschg */ - tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_SZ); + tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_SZ); ctx->bstate = BS_STOP; return; case 0x0009: /* nop */ return; case 0x001b: /* sleep */ CHECK_PRIVILEGED - gen_helper_sleep(cpu_env, tcg_const_i32(ctx->pc + 2)); + tcg_gen_movi_i32(cpu_pc, ctx->pc + 2); + gen_helper_sleep(cpu_env); return; } @@ -732,17 +723,7 @@ static void _decode_opc(DisasContext * ctx) } return; case 0x6009: /* swap.w Rm,Rn */ - { - TCGv high, low; - high = tcg_temp_new(); - tcg_gen_shli_i32(high, REG(B7_4), 16); - low = tcg_temp_new(); - tcg_gen_shri_i32(low, REG(B7_4), 16); - tcg_gen_ext16u_i32(low, low); - tcg_gen_or_i32(REG(B11_8), high, low); - tcg_temp_free(low); - tcg_temp_free(high); - } + tcg_gen_rotli_i32(REG(B11_8), REG(B7_4), 16); return; case 0x200d: /* xtrct Rm,Rn */ { @@ -751,7 +732,6 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_shli_i32(high, REG(B7_4), 16); low = tcg_temp_new(); tcg_gen_shri_i32(low, REG(B11_8), 16); - tcg_gen_ext16u_i32(low, low); tcg_gen_or_i32(REG(B11_8), high, low); tcg_temp_free(low); tcg_temp_free(high); @@ -761,10 +741,43 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_add_i32(REG(B11_8), REG(B11_8), REG(B7_4)); return; case 0x300e: /* addc Rm,Rn */ - gen_helper_addc(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8)); + { + TCGv t0, t1, t2; + t0 = tcg_temp_new(); + tcg_gen_andi_i32(t0, cpu_sr, SR_T); + t1 = tcg_temp_new(); + tcg_gen_add_i32(t1, REG(B7_4), REG(B11_8)); + tcg_gen_add_i32(t0, t0, t1); + t2 = tcg_temp_new(); + tcg_gen_setcond_i32(TCG_COND_GTU, t2, REG(B11_8), t1); + tcg_gen_setcond_i32(TCG_COND_GTU, t1, t1, t0); + tcg_gen_or_i32(t1, t1, t2); + tcg_temp_free(t2); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); + tcg_gen_or_i32(cpu_sr, cpu_sr, t1); + tcg_temp_free(t1); + tcg_gen_mov_i32(REG(B11_8), t0); + tcg_temp_free(t0); + } return; case 0x300f: /* addv Rm,Rn */ - gen_helper_addv(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8)); + { + TCGv t0, t1, t2; + t0 = tcg_temp_new(); + tcg_gen_add_i32(t0, REG(B7_4), REG(B11_8)); + t1 = tcg_temp_new(); + tcg_gen_xor_i32(t1, t0, REG(B11_8)); + t2 = tcg_temp_new(); + tcg_gen_xor_i32(t2, REG(B7_4), REG(B11_8)); + tcg_gen_andc_i32(t1, t1, t2); + tcg_temp_free(t2); + tcg_gen_shri_i32(t1, t1, 31); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); + tcg_gen_or_i32(cpu_sr, cpu_sr, t1); + tcg_temp_free(t1); + tcg_gen_mov_i32(REG(B7_4), t0); + tcg_temp_free(t0); + } return; case 0x2009: /* and Rm,Rn */ tcg_gen_and_i32(REG(B11_8), REG(B11_8), REG(B7_4)); @@ -1013,10 +1026,43 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_sub_i32(REG(B11_8), REG(B11_8), REG(B7_4)); return; case 0x300a: /* subc Rm,Rn */ - gen_helper_subc(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8)); + { + TCGv t0, t1, t2; + t0 = tcg_temp_new(); + tcg_gen_andi_i32(t0, cpu_sr, SR_T); + t1 = tcg_temp_new(); + tcg_gen_sub_i32(t1, REG(B11_8), REG(B7_4)); + tcg_gen_sub_i32(t0, t1, t0); + t2 = tcg_temp_new(); + tcg_gen_setcond_i32(TCG_COND_LTU, t2, REG(B11_8), t1); + tcg_gen_setcond_i32(TCG_COND_LTU, t1, t1, t0); + tcg_gen_or_i32(t1, t1, t2); + tcg_temp_free(t2); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); + tcg_gen_or_i32(cpu_sr, cpu_sr, t1); + tcg_temp_free(t1); + tcg_gen_mov_i32(REG(B11_8), t0); + tcg_temp_free(t0); + } return; case 0x300b: /* subv Rm,Rn */ - gen_helper_subv(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8)); + { + TCGv t0, t1, t2; + t0 = tcg_temp_new(); + tcg_gen_sub_i32(t0, REG(B11_8), REG(B7_4)); + t1 = tcg_temp_new(); + tcg_gen_xor_i32(t1, t0, REG(B7_4)); + t2 = tcg_temp_new(); + tcg_gen_xor_i32(t2, REG(B11_8), REG(B7_4)); + tcg_gen_and_i32(t1, t1, t2); + tcg_temp_free(t2); + tcg_gen_shri_i32(t1, t1, 31); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); + tcg_gen_or_i32(cpu_sr, cpu_sr, t1); + tcg_temp_free(t1); + tcg_gen_mov_i32(REG(B11_8), t0); + tcg_temp_free(t0); + } return; case 0x2008: /* tst Rm,Rn */ { @@ -1031,7 +1077,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf00c: /* fmov {F,D,X}Rm,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED - if (ctx->fpscr & FPSCR_SZ) { + if (ctx->flags & FPSCR_SZ) { TCGv_i64 fp = tcg_temp_new_i64(); gen_load_fpr64(fp, XREG(B7_4)); gen_store_fpr64(fp, XREG(B11_8)); @@ -1042,7 +1088,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED - if (ctx->fpscr & FPSCR_SZ) { + if (ctx->flags & FPSCR_SZ) { TCGv addr_hi = tcg_temp_new(); int fr = XREG(B7_4); tcg_gen_addi_i32(addr_hi, REG(B11_8), 4); @@ -1055,7 +1101,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED - if (ctx->fpscr & FPSCR_SZ) { + if (ctx->flags & FPSCR_SZ) { TCGv addr_hi = tcg_temp_new(); int fr = XREG(B11_8); tcg_gen_addi_i32(addr_hi, REG(B7_4), 4); @@ -1068,7 +1114,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED - if (ctx->fpscr & FPSCR_SZ) { + if (ctx->flags & FPSCR_SZ) { TCGv addr_hi = tcg_temp_new(); int fr = XREG(B11_8); tcg_gen_addi_i32(addr_hi, REG(B7_4), 4); @@ -1083,7 +1129,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf00b: /* fmov {F,D,X}Rm,@-Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED - if (ctx->fpscr & FPSCR_SZ) { + if (ctx->flags & FPSCR_SZ) { TCGv addr = tcg_temp_new_i32(); int fr = XREG(B7_4); tcg_gen_subi_i32(addr, REG(B11_8), 4); @@ -1106,7 +1152,7 @@ static void _decode_opc(DisasContext * ctx) { TCGv addr = tcg_temp_new_i32(); tcg_gen_add_i32(addr, REG(B7_4), REG(0)); - if (ctx->fpscr & FPSCR_SZ) { + if (ctx->flags & FPSCR_SZ) { int fr = XREG(B11_8); tcg_gen_qemu_ld32u(cpu_fregs[fr ], addr, ctx->memidx); tcg_gen_addi_i32(addr, addr, 4); @@ -1122,7 +1168,7 @@ static void _decode_opc(DisasContext * ctx) { TCGv addr = tcg_temp_new(); tcg_gen_add_i32(addr, REG(B11_8), REG(0)); - if (ctx->fpscr & FPSCR_SZ) { + if (ctx->flags & FPSCR_SZ) { int fr = XREG(B7_4); tcg_gen_qemu_ld32u(cpu_fregs[fr ], addr, ctx->memidx); tcg_gen_addi_i32(addr, addr, 4); @@ -1141,7 +1187,7 @@ static void _decode_opc(DisasContext * ctx) case 0xf005: /* fcmp/gt Rm,Rn - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */ { CHECK_FPU_ENABLED - if (ctx->fpscr & FPSCR_PR) { + if (ctx->flags & FPSCR_PR) { TCGv_i64 fp0, fp1; if (ctx->opcode & 0x0110) @@ -1210,7 +1256,7 @@ static void _decode_opc(DisasContext * ctx) case 0xf00e: /* fmac FR0,RM,Rn */ { CHECK_FPU_ENABLED - if (ctx->fpscr & FPSCR_PR) { + if (ctx->flags & FPSCR_PR) { break; /* illegal instruction */ } else { gen_helper_fmac_FT(cpu_fregs[FREG(B11_8)], cpu_env, @@ -1366,6 +1412,7 @@ static void _decode_opc(DisasContext * ctx) { TCGv imm; CHECK_NOT_DELAY_SLOT + tcg_gen_movi_i32(cpu_pc, ctx->pc); imm = tcg_const_i32(B7_0); gen_helper_trapa(cpu_env, imm); tcg_temp_free(imm); @@ -1605,7 +1652,7 @@ static void _decode_opc(DisasContext * ctx) */ if (ctx->features & SH_FEATURE_SH4A) { int label = gen_new_label(); - gen_clr_t(); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); tcg_gen_or_i32(cpu_sr, cpu_sr, cpu_ldst); tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ldst, 0, label); tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx); @@ -1739,7 +1786,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf02d: /* float FPUL,FRn/DRn - FPSCR: R[PR,Enable.I]/W[Cause,Flag] */ CHECK_FPU_ENABLED - if (ctx->fpscr & FPSCR_PR) { + if (ctx->flags & FPSCR_PR) { TCGv_i64 fp; if (ctx->opcode & 0x0100) break; /* illegal instruction */ @@ -1754,7 +1801,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf03d: /* ftrc FRm/DRm,FPUL - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */ CHECK_FPU_ENABLED - if (ctx->fpscr & FPSCR_PR) { + if (ctx->flags & FPSCR_PR) { TCGv_i64 fp; if (ctx->opcode & 0x0100) break; /* illegal instruction */ @@ -1775,7 +1822,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf05d: /* fabs FRn/DRn */ CHECK_FPU_ENABLED - if (ctx->fpscr & FPSCR_PR) { + if (ctx->flags & FPSCR_PR) { if (ctx->opcode & 0x0100) break; /* illegal instruction */ TCGv_i64 fp = tcg_temp_new_i64(); @@ -1789,7 +1836,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf06d: /* fsqrt FRn */ CHECK_FPU_ENABLED - if (ctx->fpscr & FPSCR_PR) { + if (ctx->flags & FPSCR_PR) { if (ctx->opcode & 0x0100) break; /* illegal instruction */ TCGv_i64 fp = tcg_temp_new_i64(); @@ -1807,13 +1854,13 @@ static void _decode_opc(DisasContext * ctx) break; case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */ CHECK_FPU_ENABLED - if (!(ctx->fpscr & FPSCR_PR)) { + if (!(ctx->flags & FPSCR_PR)) { tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0); } return; case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */ CHECK_FPU_ENABLED - if (!(ctx->fpscr & FPSCR_PR)) { + if (!(ctx->flags & FPSCR_PR)) { tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0x3f800000); } return; @@ -1837,7 +1884,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf0ed: /* fipr FVm,FVn */ CHECK_FPU_ENABLED - if ((ctx->fpscr & FPSCR_PR) == 0) { + if ((ctx->flags & FPSCR_PR) == 0) { TCGv m, n; m = tcg_const_i32((ctx->opcode >> 8) & 3); n = tcg_const_i32((ctx->opcode >> 10) & 3); @@ -1850,7 +1897,7 @@ static void _decode_opc(DisasContext * ctx) case 0xf0fd: /* ftrv XMTRX,FVn */ CHECK_FPU_ENABLED if ((ctx->opcode & 0x0300) == 0x0100 && - (ctx->fpscr & FPSCR_PR) == 0) { + (ctx->flags & FPSCR_PR) == 0) { TCGv n; n = tcg_const_i32((ctx->opcode >> 10) & 3); gen_helper_ftrv(cpu_env, n); @@ -1864,12 +1911,13 @@ static void _decode_opc(DisasContext * ctx) ctx->opcode, ctx->pc); fflush(stderr); #endif + tcg_gen_movi_i32(cpu_pc, ctx->pc); if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { gen_helper_raise_slot_illegal_instruction(cpu_env); } else { gen_helper_raise_illegal_instruction(cpu_env); } - ctx->bstate = BS_EXCP; + ctx->bstate = BS_BRANCH; } static void decode_opc(DisasContext * ctx) @@ -1923,16 +1971,14 @@ gen_intermediate_code_internal(CPUSH4State * env, TranslationBlock * tb, ctx.pc = pc_start; ctx.flags = (uint32_t)tb->flags; ctx.bstate = BS_NONE; - ctx.sr = env->sr; - ctx.fpscr = env->fpscr; - ctx.memidx = (env->sr & SR_MD) == 0 ? 1 : 0; + ctx.memidx = (ctx.flags & SR_MD) == 0 ? 1 : 0; /* We don't know if the delayed pc came from a dynamic or static branch, so assume it is a dynamic branch. */ ctx.delayed_pc = -1; /* use delayed pc from env pointer */ ctx.tb = tb; ctx.singlestep_enabled = env->singlestep_enabled; ctx.features = env->features; - ctx.has_movcal = (tb->flags & TB_FLAG_PENDING_MOVCA); + ctx.has_movcal = (ctx.flags & TB_FLAG_PENDING_MOVCA); ii = -1; num_insns = 0; @@ -1947,7 +1993,7 @@ gen_intermediate_code_internal(CPUSH4State * env, TranslationBlock * tb, /* We have hit a breakpoint - make sure PC is up-to-date */ tcg_gen_movi_i32(cpu_pc, ctx.pc); gen_helper_debug(cpu_env); - ctx.bstate = BS_EXCP; + ctx.bstate = BS_BRANCH; break; } } @@ -2022,9 +2068,6 @@ gen_intermediate_code_internal(CPUSH4State * env, TranslationBlock * tb, } #ifdef DEBUG_DISAS -#ifdef SH4_DEBUG_DISAS - qemu_log_mask(CPU_LOG_TB_IN_ASM, "\n"); -#endif if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { qemu_log("IN:\n"); /* , lookup_symbol(pc_start)); */ log_target_disas(pc_start, ctx.pc - pc_start, 0); diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h index 177094ae9a..7348277edc 100644 --- a/target-xtensa/cpu.h +++ b/target-xtensa/cpu.h @@ -36,6 +36,7 @@ #include "config.h" #include "qemu-common.h" #include "cpu-defs.h" +#include "fpu/softfloat.h" #define TARGET_HAS_ICE 1 @@ -325,6 +326,8 @@ typedef struct CPUXtensaState { uint32_t sregs[256]; uint32_t uregs[256]; uint32_t phys_regs[MAX_NAREG]; + float32 fregs[16]; + float_status fp_status; xtensa_tlb_entry itlb[7][MAX_TLB_WAY_SIZE]; xtensa_tlb_entry dtlb[10][MAX_TLB_WAY_SIZE]; @@ -465,6 +468,8 @@ static inline int cpu_mmu_index(CPUXtensaState *env) #define XTENSA_TBFLAG_LITBASE 0x8 #define XTENSA_TBFLAG_DEBUG 0x10 #define XTENSA_TBFLAG_ICOUNT 0x20 +#define XTENSA_TBFLAG_CPENABLE_MASK 0x3fc0 +#define XTENSA_TBFLAG_CPENABLE_SHIFT 6 static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc, target_ulong *cs_base, int *flags) @@ -488,6 +493,9 @@ static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc, *flags |= XTENSA_TBFLAG_ICOUNT; } } + if (xtensa_option_enabled(env->config, XTENSA_OPTION_COPROCESSOR)) { + *flags |= env->sregs[CPENABLE] << XTENSA_TBFLAG_CPENABLE_SHIFT; + } } #include "cpu-all.h" diff --git a/target-xtensa/helper.h b/target-xtensa/helper.h index 152fec044d..4cc0088c36 100644 --- a/target-xtensa/helper.h +++ b/target-xtensa/helper.h @@ -36,4 +36,25 @@ DEF_HELPER_3(wsr_ibreaka, void, env, i32, i32) DEF_HELPER_3(wsr_dbreaka, void, env, i32, i32) DEF_HELPER_3(wsr_dbreakc, void, env, i32, i32) +DEF_HELPER_2(wur_fcr, void, env, i32) +DEF_HELPER_FLAGS_1(abs_s, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32) +DEF_HELPER_FLAGS_1(neg_s, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32) +DEF_HELPER_3(add_s, f32, env, f32, f32) +DEF_HELPER_3(sub_s, f32, env, f32, f32) +DEF_HELPER_3(mul_s, f32, env, f32, f32) +DEF_HELPER_4(madd_s, f32, env, f32, f32, f32) +DEF_HELPER_4(msub_s, f32, env, f32, f32, f32) +DEF_HELPER_FLAGS_3(ftoi, TCG_CALL_CONST | TCG_CALL_PURE, i32, f32, i32, i32) +DEF_HELPER_FLAGS_3(ftoui, TCG_CALL_CONST | TCG_CALL_PURE, i32, f32, i32, i32) +DEF_HELPER_3(itof, f32, env, i32, i32) +DEF_HELPER_3(uitof, f32, env, i32, i32) + +DEF_HELPER_4(un_s, void, env, i32, f32, f32) +DEF_HELPER_4(oeq_s, void, env, i32, f32, f32) +DEF_HELPER_4(ueq_s, void, env, i32, f32, f32) +DEF_HELPER_4(olt_s, void, env, i32, f32, f32) +DEF_HELPER_4(ult_s, void, env, i32, f32, f32) +DEF_HELPER_4(ole_s, void, env, i32, f32, f32) +DEF_HELPER_4(ule_s, void, env, i32, f32, f32) + #include "def-helper.h" diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c index 2659c0e00f..ae0c09977b 100644 --- a/target-xtensa/op_helper.c +++ b/target-xtensa/op_helper.c @@ -771,3 +771,137 @@ void HELPER(wsr_dbreakc)(CPUXtensaState *env, uint32_t i, uint32_t v) } env->sregs[DBREAKC + i] = v; } + +void HELPER(wur_fcr)(CPUXtensaState *env, uint32_t v) +{ + static const int rounding_mode[] = { + float_round_nearest_even, + float_round_to_zero, + float_round_up, + float_round_down, + }; + + env->uregs[FCR] = v & 0xfffff07f; + set_float_rounding_mode(rounding_mode[v & 3], &env->fp_status); +} + +float32 HELPER(abs_s)(float32 v) +{ + return float32_abs(v); +} + +float32 HELPER(neg_s)(float32 v) +{ + return float32_chs(v); +} + +float32 HELPER(add_s)(CPUXtensaState *env, float32 a, float32 b) +{ + return float32_add(a, b, &env->fp_status); +} + +float32 HELPER(sub_s)(CPUXtensaState *env, float32 a, float32 b) +{ + return float32_sub(a, b, &env->fp_status); +} + +float32 HELPER(mul_s)(CPUXtensaState *env, float32 a, float32 b) +{ + return float32_mul(a, b, &env->fp_status); +} + +float32 HELPER(madd_s)(CPUXtensaState *env, float32 a, float32 b, float32 c) +{ + return float32_muladd(b, c, a, 0, + &env->fp_status); +} + +float32 HELPER(msub_s)(CPUXtensaState *env, float32 a, float32 b, float32 c) +{ + return float32_muladd(b, c, a, float_muladd_negate_product, + &env->fp_status); +} + +uint32_t HELPER(ftoi)(float32 v, uint32_t rounding_mode, uint32_t scale) +{ + float_status fp_status = {0}; + + set_float_rounding_mode(rounding_mode, &fp_status); + return float32_to_int32( + float32_scalbn(v, scale, &fp_status), &fp_status); +} + +uint32_t HELPER(ftoui)(float32 v, uint32_t rounding_mode, uint32_t scale) +{ + float_status fp_status = {0}; + float32 res; + + set_float_rounding_mode(rounding_mode, &fp_status); + + res = float32_scalbn(v, scale, &fp_status); + + if (float32_is_neg(v) && !float32_is_any_nan(v)) { + return float32_to_int32(res, &fp_status); + } else { + return float32_to_uint32(res, &fp_status); + } +} + +float32 HELPER(itof)(CPUXtensaState *env, uint32_t v, uint32_t scale) +{ + return float32_scalbn(int32_to_float32(v, &env->fp_status), + (int32_t)scale, &env->fp_status); +} + +float32 HELPER(uitof)(CPUXtensaState *env, uint32_t v, uint32_t scale) +{ + return float32_scalbn(uint32_to_float32(v, &env->fp_status), + (int32_t)scale, &env->fp_status); +} + +static inline void set_br(CPUXtensaState *env, bool v, uint32_t br) +{ + if (v) { + env->sregs[BR] |= br; + } else { + env->sregs[BR] &= ~br; + } +} + +void HELPER(un_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b) +{ + set_br(env, float32_unordered_quiet(a, b, &env->fp_status), br); +} + +void HELPER(oeq_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b) +{ + set_br(env, float32_eq_quiet(a, b, &env->fp_status), br); +} + +void HELPER(ueq_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b) +{ + int v = float32_compare_quiet(a, b, &env->fp_status); + set_br(env, v == float_relation_equal || v == float_relation_unordered, br); +} + +void HELPER(olt_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b) +{ + set_br(env, float32_lt_quiet(a, b, &env->fp_status), br); +} + +void HELPER(ult_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b) +{ + int v = float32_compare_quiet(a, b, &env->fp_status); + set_br(env, v == float_relation_less || v == float_relation_unordered, br); +} + +void HELPER(ole_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b) +{ + set_br(env, float32_le_quiet(a, b, &env->fp_status), br); +} + +void HELPER(ule_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b) +{ + int v = float32_compare_quiet(a, b, &env->fp_status); + set_br(env, v != float_relation_greater, br); +} diff --git a/target-xtensa/overlay_tool.h b/target-xtensa/overlay_tool.h index a3a5650fb0..e39505316b 100644 --- a/target-xtensa/overlay_tool.h +++ b/target-xtensa/overlay_tool.h @@ -58,6 +58,7 @@ XCHAL_OPTION(XCHAL_HAVE_SEXT, XTENSA_OPTION_MISC_OP_SEXT) | \ XCHAL_OPTION(XCHAL_HAVE_CLAMPS, XTENSA_OPTION_MISC_OP_CLAMPS) | \ XCHAL_OPTION(XCHAL_HAVE_CP, XTENSA_OPTION_COPROCESSOR) | \ + XCHAL_OPTION(XCHAL_HAVE_BOOLEANS, XTENSA_OPTION_BOOLEAN) | \ XCHAL_OPTION(XCHAL_HAVE_FP, XTENSA_OPTION_FP_COPROCESSOR) | \ XCHAL_OPTION(XCHAL_HAVE_RELEASE_SYNC, XTENSA_OPTION_MP_SYNCHRO) | \ XCHAL_OPTION(XCHAL_HAVE_S32C1I, XTENSA_OPTION_CONDITIONAL_STORE) | \ diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c index 1900bd5d44..ba3ffcb7a4 100644 --- a/target-xtensa/translate.c +++ b/target-xtensa/translate.c @@ -65,11 +65,14 @@ typedef struct DisasContext { bool debug; bool icount; TCGv_i32 next_icount; + + unsigned cpenable; } DisasContext; static TCGv_ptr cpu_env; static TCGv_i32 cpu_pc; static TCGv_i32 cpu_R[16]; +static TCGv_i32 cpu_FR[16]; static TCGv_i32 cpu_SR[256]; static TCGv_i32 cpu_UR[256]; @@ -155,6 +158,12 @@ void xtensa_translate_init(void) "ar8", "ar9", "ar10", "ar11", "ar12", "ar13", "ar14", "ar15", }; + static const char * const fregnames[] = { + "f0", "f1", "f2", "f3", + "f4", "f5", "f6", "f7", + "f8", "f9", "f10", "f11", + "f12", "f13", "f14", "f15", + }; int i; cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env"); @@ -167,6 +176,12 @@ void xtensa_translate_init(void) regnames[i]); } + for (i = 0; i < 16; i++) { + cpu_FR[i] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUXtensaState, fregs[i]), + fregnames[i]); + } + for (i = 0; i < 256; ++i) { if (sregnames[i]) { cpu_SR[i] = tcg_global_mem_new_i32(TCG_AREG0, @@ -318,6 +333,15 @@ static void gen_check_privilege(DisasContext *dc) } } +static void gen_check_cpenable(DisasContext *dc, unsigned cp) +{ + if (option_enabled(dc, XTENSA_OPTION_COPROCESSOR) && + !(dc->cpenable & (1 << cp))) { + gen_exception_cause(dc, COPROCESSOR0_DISABLED + cp); + dc->is_jmp = DISAS_UPDATE; + } +} + static void gen_jump_slot(DisasContext *dc, TCGv dest, int slot) { tcg_gen_mov_i32(cpu_pc, dest); @@ -566,6 +590,13 @@ static void gen_wsr_dbreakc(DisasContext *dc, uint32_t sr, TCGv_i32 v) } } +static void gen_wsr_cpenable(DisasContext *dc, uint32_t sr, TCGv_i32 v) +{ + tcg_gen_andi_i32(cpu_SR[sr], v, 0xff); + /* This can change tb->flags, so exit tb */ + gen_jumpi_check_loop_end(dc, -1); +} + static void gen_wsr_intset(DisasContext *dc, uint32_t sr, TCGv_i32 v) { tcg_gen_andi_i32(cpu_SR[sr], v, @@ -668,6 +699,7 @@ static void gen_wsr(DisasContext *dc, uint32_t sr, TCGv_i32 s) [DBREAKA + 1] = gen_wsr_dbreaka, [DBREAKC] = gen_wsr_dbreakc, [DBREAKC + 1] = gen_wsr_dbreakc, + [CPENABLE] = gen_wsr_cpenable, [INTSET] = gen_wsr_intset, [INTCLEAR] = gen_wsr_intclear, [INTENABLE] = gen_wsr_intenable, @@ -692,6 +724,23 @@ static void gen_wsr(DisasContext *dc, uint32_t sr, TCGv_i32 s) } } +static void gen_wur(uint32_t ur, TCGv_i32 s) +{ + switch (ur) { + case FCR: + gen_helper_wur_fcr(cpu_env, s); + break; + + case FSR: + tcg_gen_andi_i32(cpu_UR[ur], s, 0xffffff80); + break; + + default: + tcg_gen_mov_i32(cpu_UR[ur], s); + break; + } +} + static void gen_load_store_alignment(DisasContext *dc, int shift, TCGv_i32 addr, bool no_hw_alignment) { @@ -1761,13 +1810,11 @@ static void disas_xtensa_insn(DisasContext *dc) case 15: /*WUR*/ gen_window_check1(dc, RRR_T); - { - if (uregnames[RSR_SR]) { - tcg_gen_mov_i32(cpu_UR[RSR_SR], cpu_R[RRR_T]); - } else { - qemu_log("WUR %d not implemented, ", RSR_SR); - TBD(); - } + if (uregnames[RSR_SR]) { + gen_wur(RSR_SR, cpu_R[RRR_T]); + } else { + qemu_log("WUR %d not implemented, ", RSR_SR); + TBD(); } break; @@ -1778,12 +1825,30 @@ static void disas_xtensa_insn(DisasContext *dc) case 5: gen_window_check2(dc, RRR_R, RRR_T); { - int shiftimm = RRR_S | (OP1 << 4); + int shiftimm = RRR_S | ((OP1 & 1) << 4); int maskimm = (1 << (OP2 + 1)) - 1; TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_shri_i32(tmp, cpu_R[RRR_T], shiftimm); - tcg_gen_andi_i32(cpu_R[RRR_R], tmp, maskimm); + + if (shiftimm) { + tcg_gen_shri_i32(tmp, cpu_R[RRR_T], shiftimm); + } else { + tcg_gen_mov_i32(tmp, cpu_R[RRR_T]); + } + + switch (maskimm) { + case 0xff: + tcg_gen_ext8u_i32(cpu_R[RRR_R], tmp); + break; + + case 0xffff: + tcg_gen_ext16u_i32(cpu_R[RRR_R], tmp); + break; + + default: + tcg_gen_andi_i32(cpu_R[RRR_R], tmp, maskimm); + break; + } tcg_temp_free(tmp); } break; @@ -1797,8 +1862,34 @@ static void disas_xtensa_insn(DisasContext *dc) break; case 8: /*LSCXp*/ - HAS_OPTION(XTENSA_OPTION_COPROCESSOR); - TBD(); + switch (OP2) { + case 0: /*LSXf*/ + case 1: /*LSXUf*/ + case 4: /*SSXf*/ + case 5: /*SSXUf*/ + HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR); + gen_window_check2(dc, RRR_S, RRR_T); + gen_check_cpenable(dc, 0); + { + TCGv_i32 addr = tcg_temp_new_i32(); + tcg_gen_add_i32(addr, cpu_R[RRR_S], cpu_R[RRR_T]); + gen_load_store_alignment(dc, 2, addr, false); + if (OP2 & 0x4) { + tcg_gen_qemu_st32(cpu_FR[RRR_R], addr, dc->cring); + } else { + tcg_gen_qemu_ld32u(cpu_FR[RRR_R], addr, dc->cring); + } + if (OP2 & 0x1) { + tcg_gen_mov_i32(cpu_R[RRR_S], addr); + } + tcg_temp_free(addr); + } + break; + + default: /*reserved*/ + RESERVED(); + break; + } break; case 9: /*LSC4*/ @@ -1836,12 +1927,213 @@ static void disas_xtensa_insn(DisasContext *dc) case 10: /*FP0*/ HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR); - TBD(); + switch (OP2) { + case 0: /*ADD.Sf*/ + gen_check_cpenable(dc, 0); + gen_helper_add_s(cpu_FR[RRR_R], cpu_env, + cpu_FR[RRR_S], cpu_FR[RRR_T]); + break; + + case 1: /*SUB.Sf*/ + gen_check_cpenable(dc, 0); + gen_helper_sub_s(cpu_FR[RRR_R], cpu_env, + cpu_FR[RRR_S], cpu_FR[RRR_T]); + break; + + case 2: /*MUL.Sf*/ + gen_check_cpenable(dc, 0); + gen_helper_mul_s(cpu_FR[RRR_R], cpu_env, + cpu_FR[RRR_S], cpu_FR[RRR_T]); + break; + + case 4: /*MADD.Sf*/ + gen_check_cpenable(dc, 0); + gen_helper_madd_s(cpu_FR[RRR_R], cpu_env, + cpu_FR[RRR_R], cpu_FR[RRR_S], cpu_FR[RRR_T]); + break; + + case 5: /*MSUB.Sf*/ + gen_check_cpenable(dc, 0); + gen_helper_msub_s(cpu_FR[RRR_R], cpu_env, + cpu_FR[RRR_R], cpu_FR[RRR_S], cpu_FR[RRR_T]); + break; + + case 8: /*ROUND.Sf*/ + case 9: /*TRUNC.Sf*/ + case 10: /*FLOOR.Sf*/ + case 11: /*CEIL.Sf*/ + case 14: /*UTRUNC.Sf*/ + gen_window_check1(dc, RRR_R); + gen_check_cpenable(dc, 0); + { + static const unsigned rounding_mode_const[] = { + float_round_nearest_even, + float_round_to_zero, + float_round_down, + float_round_up, + [6] = float_round_to_zero, + }; + TCGv_i32 rounding_mode = tcg_const_i32( + rounding_mode_const[OP2 & 7]); + TCGv_i32 scale = tcg_const_i32(RRR_T); + + if (OP2 == 14) { + gen_helper_ftoui(cpu_R[RRR_R], cpu_FR[RRR_S], + rounding_mode, scale); + } else { + gen_helper_ftoi(cpu_R[RRR_R], cpu_FR[RRR_S], + rounding_mode, scale); + } + + tcg_temp_free(rounding_mode); + tcg_temp_free(scale); + } + break; + + case 12: /*FLOAT.Sf*/ + case 13: /*UFLOAT.Sf*/ + gen_window_check1(dc, RRR_S); + gen_check_cpenable(dc, 0); + { + TCGv_i32 scale = tcg_const_i32(-RRR_T); + + if (OP2 == 13) { + gen_helper_uitof(cpu_FR[RRR_R], cpu_env, + cpu_R[RRR_S], scale); + } else { + gen_helper_itof(cpu_FR[RRR_R], cpu_env, + cpu_R[RRR_S], scale); + } + tcg_temp_free(scale); + } + break; + + case 15: /*FP1OP*/ + switch (RRR_T) { + case 0: /*MOV.Sf*/ + gen_check_cpenable(dc, 0); + tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_FR[RRR_S]); + break; + + case 1: /*ABS.Sf*/ + gen_check_cpenable(dc, 0); + gen_helper_abs_s(cpu_FR[RRR_R], cpu_FR[RRR_S]); + break; + + case 4: /*RFRf*/ + gen_window_check1(dc, RRR_R); + gen_check_cpenable(dc, 0); + tcg_gen_mov_i32(cpu_R[RRR_R], cpu_FR[RRR_S]); + break; + + case 5: /*WFRf*/ + gen_window_check1(dc, RRR_S); + gen_check_cpenable(dc, 0); + tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_R[RRR_S]); + break; + + case 6: /*NEG.Sf*/ + gen_check_cpenable(dc, 0); + gen_helper_neg_s(cpu_FR[RRR_R], cpu_FR[RRR_S]); + break; + + default: /*reserved*/ + RESERVED(); + break; + } + break; + + default: /*reserved*/ + RESERVED(); + break; + } break; case 11: /*FP1*/ HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR); - TBD(); + +#define gen_compare(rel, br, a, b) \ + do { \ + TCGv_i32 bit = tcg_const_i32(1 << br); \ + \ + gen_check_cpenable(dc, 0); \ + gen_helper_##rel(cpu_env, bit, cpu_FR[a], cpu_FR[b]); \ + tcg_temp_free(bit); \ + } while (0) + + switch (OP2) { + case 1: /*UN.Sf*/ + gen_compare(un_s, RRR_R, RRR_S, RRR_T); + break; + + case 2: /*OEQ.Sf*/ + gen_compare(oeq_s, RRR_R, RRR_S, RRR_T); + break; + + case 3: /*UEQ.Sf*/ + gen_compare(ueq_s, RRR_R, RRR_S, RRR_T); + break; + + case 4: /*OLT.Sf*/ + gen_compare(olt_s, RRR_R, RRR_S, RRR_T); + break; + + case 5: /*ULT.Sf*/ + gen_compare(ult_s, RRR_R, RRR_S, RRR_T); + break; + + case 6: /*OLE.Sf*/ + gen_compare(ole_s, RRR_R, RRR_S, RRR_T); + break; + + case 7: /*ULE.Sf*/ + gen_compare(ule_s, RRR_R, RRR_S, RRR_T); + break; + +#undef gen_compare + + case 8: /*MOVEQZ.Sf*/ + case 9: /*MOVNEZ.Sf*/ + case 10: /*MOVLTZ.Sf*/ + case 11: /*MOVGEZ.Sf*/ + gen_window_check1(dc, RRR_T); + gen_check_cpenable(dc, 0); + { + static const TCGCond cond[] = { + TCG_COND_NE, + TCG_COND_EQ, + TCG_COND_GE, + TCG_COND_LT + }; + int label = gen_new_label(); + tcg_gen_brcondi_i32(cond[OP2 - 8], cpu_R[RRR_T], 0, label); + tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_FR[RRR_S]); + gen_set_label(label); + } + break; + + case 12: /*MOVF.Sf*/ + case 13: /*MOVT.Sf*/ + HAS_OPTION(XTENSA_OPTION_BOOLEAN); + gen_check_cpenable(dc, 0); + { + int label = gen_new_label(); + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_andi_i32(tmp, cpu_SR[BR], 1 << RRR_T); + tcg_gen_brcondi_i32( + OP2 & 1 ? TCG_COND_EQ : TCG_COND_NE, + tmp, 0, label); + tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_FR[RRR_S]); + gen_set_label(label); + tcg_temp_free(tmp); + } + break; + + default: /*reserved*/ + RESERVED(); + break; + } break; default: /*reserved*/ @@ -2072,8 +2364,34 @@ static void disas_xtensa_insn(DisasContext *dc) break; case 3: /*LSCIp*/ - HAS_OPTION(XTENSA_OPTION_COPROCESSOR); - TBD(); + switch (RRI8_R) { + case 0: /*LSIf*/ + case 4: /*SSIf*/ + case 8: /*LSIUf*/ + case 12: /*SSIUf*/ + HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR); + gen_window_check1(dc, RRI8_S); + gen_check_cpenable(dc, 0); + { + TCGv_i32 addr = tcg_temp_new_i32(); + tcg_gen_addi_i32(addr, cpu_R[RRI8_S], RRI8_IMM8 << 2); + gen_load_store_alignment(dc, 2, addr, false); + if (RRI8_R & 0x4) { + tcg_gen_qemu_st32(cpu_FR[RRI8_T], addr, dc->cring); + } else { + tcg_gen_qemu_ld32u(cpu_FR[RRI8_T], addr, dc->cring); + } + if (RRI8_R & 0x8) { + tcg_gen_mov_i32(cpu_R[RRI8_S], addr); + } + tcg_temp_free(addr); + } + break; + + default: /*reserved*/ + RESERVED(); + break; + } break; case 4: /*MAC16d*/ @@ -2502,7 +2820,9 @@ static void disas_xtensa_insn(DisasContext *dc) break; } - gen_check_loop_end(dc, 0); + if (dc->is_jmp == DISAS_NEXT) { + gen_check_loop_end(dc, 0); + } dc->pc = dc->next_pc; return; @@ -2569,6 +2889,8 @@ static void gen_intermediate_code_internal( dc.ccount_delta = 0; dc.debug = tb->flags & XTENSA_TBFLAG_DEBUG; dc.icount = tb->flags & XTENSA_TBFLAG_ICOUNT; + dc.cpenable = (tb->flags & XTENSA_TBFLAG_CPENABLE_MASK) >> + XTENSA_TBFLAG_CPENABLE_SHIFT; init_litbase(&dc); init_sar_tracker(&dc); @@ -2710,6 +3032,16 @@ void cpu_dump_state(CPUXtensaState *env, FILE *f, fprintf_function cpu_fprintf, cpu_fprintf(f, "AR%02d=%08x%c", i, env->phys_regs[i], (i % 4) == 3 ? '\n' : ' '); } + + if (xtensa_option_enabled(env->config, XTENSA_OPTION_FP_COPROCESSOR)) { + cpu_fprintf(f, "\n"); + + for (i = 0; i < 16; ++i) { + cpu_fprintf(f, "F%02d=%08x (%+10.8e)%c", i, + float32_val(env->fregs[i]), + *(float *)&env->fregs[i], (i % 2) == 1 ? '\n' : ' '); + } + } } void restore_state_to_opc(CPUXtensaState *env, TranslationBlock *tb, int pc_pos) diff --git a/tcg/README b/tcg/README index cfdfd96d09..33783ee17b 100644 --- a/tcg/README +++ b/tcg/README @@ -307,6 +307,12 @@ dest = (t1 cond t2) Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0. +* movcond_i32/i64 cond, dest, c1, c2, v1, v2 + +dest = (c1 cond c2 ? v1 : v2) + +Set DEST to V1 if (C1 cond C2) is true, otherwise set to V2. + ********* Type conversions * ext_i32_i64 t0, t1 @@ -386,7 +392,8 @@ Exit the current TB and return the value t0 (word type). Exit the current TB and jump to the TB index 'index' (constant) if the current TB was linked to this TB. Otherwise execute the next -instructions. +instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued +at most once with each slot index per TB. * qemu_ld8u t0, t1, flags qemu_ld8s t0, t1, flags diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index aed3b53247..2bad0a2b17 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -145,12 +145,6 @@ static void patch_reloc(uint8_t *code_ptr, int type, } } -/* maximum number of register used for input function arguments */ -static inline int tcg_target_get_call_iarg_regs_count(int flags) -{ - return 4; -} - /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index c0b8f7274d..e2299cadd3 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -73,6 +73,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 +#define TCG_TARGET_HAS_movcond_i32 0 #define TCG_TARGET_HAS_GUEST_BASE diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c index 8b81b70f17..2c79c1081e 100644 --- a/tcg/hppa/tcg-target.c +++ b/tcg/hppa/tcg-target.c @@ -175,12 +175,6 @@ static void patch_reloc(uint8_t *code_ptr, int type, *insn_ptr = insn; } -/* maximum number of register used for input function arguments */ -static inline int tcg_target_get_call_iarg_regs_count(int flags) -{ - return 4; -} - /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { @@ -820,19 +814,34 @@ static void tcg_out_comclr(TCGContext *s, int cond, TCGArg ret, tcg_out32(s, op); } +static TCGCond const tcg_high_cond[] = { + [TCG_COND_EQ] = TCG_COND_EQ, + [TCG_COND_NE] = TCG_COND_NE, + [TCG_COND_LT] = TCG_COND_LT, + [TCG_COND_LE] = TCG_COND_LT, + [TCG_COND_GT] = TCG_COND_GT, + [TCG_COND_GE] = TCG_COND_GT, + [TCG_COND_LTU] = TCG_COND_LTU, + [TCG_COND_LEU] = TCG_COND_LTU, + [TCG_COND_GTU] = TCG_COND_GTU, + [TCG_COND_GEU] = TCG_COND_GTU +}; + static void tcg_out_brcond2(TCGContext *s, int cond, TCGArg al, TCGArg ah, TCGArg bl, int blconst, TCGArg bh, int bhconst, int label_index) { switch (cond) { case TCG_COND_EQ: + tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, al, bl, blconst); + tcg_out_brcond(s, TCG_COND_EQ, ah, bh, bhconst, label_index); + break; case TCG_COND_NE: - tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, al, bl, blconst); - tcg_out_brcond(s, cond, ah, bh, bhconst, label_index); + tcg_out_brcond(s, TCG_COND_NE, al, bl, bhconst, label_index); + tcg_out_brcond(s, TCG_COND_NE, ah, bh, bhconst, label_index); break; - default: - tcg_out_brcond(s, cond, ah, bh, bhconst, label_index); + tcg_out_brcond(s, tcg_high_cond[cond], ah, bh, bhconst, label_index); tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, ah, bh, bhconst); tcg_out_brcond(s, tcg_unsigned_cond(cond), al, bl, blconst, label_index); @@ -853,9 +862,8 @@ static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret, { int scratch = TCG_REG_R20; - if (ret != al && ret != ah - && (blconst || ret != bl) - && (bhconst || ret != bh)) { + /* Note that the low parts are fully consumed before scratch is set. */ + if (ret != ah && (bhconst || ret != bh)) { scratch = ret; } @@ -867,18 +875,49 @@ static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret, tcg_out_movi(s, TCG_TYPE_I32, scratch, cond == TCG_COND_NE); break; - default: + case TCG_COND_GE: + case TCG_COND_GEU: + case TCG_COND_LT: + case TCG_COND_LTU: + /* Optimize compares with low part zero. */ + if (bl == 0) { + tcg_out_setcond(s, cond, ret, ah, bh, bhconst); + return; + } + /* FALLTHRU */ + + case TCG_COND_LE: + case TCG_COND_LEU: + case TCG_COND_GT: + case TCG_COND_GTU: + /* <= : ah < bh | (ah == bh && al <= bl) */ tcg_out_setcond(s, tcg_unsigned_cond(cond), scratch, al, bl, blconst); tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst); tcg_out_movi(s, TCG_TYPE_I32, scratch, 0); - tcg_out_comclr(s, cond, TCG_REG_R0, ah, bh, bhconst); + tcg_out_comclr(s, tcg_invert_cond(tcg_high_cond[cond]), + TCG_REG_R0, ah, bh, bhconst); tcg_out_movi(s, TCG_TYPE_I32, scratch, 1); break; + + default: + tcg_abort(); } tcg_out_mov(s, TCG_TYPE_I32, ret, scratch); } +static void tcg_out_movcond(TCGContext *s, int cond, TCGArg ret, + TCGArg c1, TCGArg c2, int c2const, + TCGArg v1, int v1const) +{ + tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, c1, c2, c2const); + if (v1const) { + tcg_out_movi(s, TCG_TYPE_I32, ret, v1); + } else { + tcg_out_mov(s, TCG_TYPE_I32, ret, v1); + } +} + #if defined(CONFIG_SOFTMMU) #include "../../softmmu_defs.h" @@ -943,10 +982,11 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo, tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset); } - /* Compute the value that ought to appear in the TLB for a hit, namely, the page - of the address. We include the low N bits of the address to catch unaligned - accesses and force them onto the slow path. Do this computation after having - issued the load from the TLB slot to give the load time to complete. */ + /* Compute the value that ought to appear in the TLB for a hit, namely, + the page of the address. We include the low N bits of the address + to catch unaligned accesses and force them onto the slow path. Do + this computation after having issued the load from the TLB slot to + give the load time to complete. */ tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); /* If not equal, jump to lab_miss. */ @@ -959,6 +999,36 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo, return ret; } + +static int tcg_out_arg_reg32(TCGContext *s, int argno, TCGArg v, bool vconst) +{ + if (argno < 4) { + if (vconst) { + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v); + } else { + tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v); + } + } else { + if (vconst && v != 0) { + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, v); + v = TCG_REG_R20; + } + tcg_out_st(s, TCG_TYPE_I32, v, TCG_REG_CALL_STACK, + TCG_TARGET_CALL_STACK_OFFSET - ((argno - 3) * 4)); + } + return argno + 1; +} + +static int tcg_out_arg_reg64(TCGContext *s, int argno, TCGArg vl, TCGArg vh) +{ + /* 64-bit arguments must go in even reg pairs and stack slots. */ + if (argno & 1) { + argno++; + } + argno = tcg_out_arg_reg32(s, argno, vl, false); + argno = tcg_out_arg_reg32(s, argno, vh, false); + return argno; +} #endif static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo_reg, int datahi_reg, @@ -1039,39 +1109,36 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) /* Note that addrhi_reg is only used for 64-bit guests. */ int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0); int mem_index = *args; - int lab1, lab2, argreg, offset; + int lab1, lab2, argno, offset; lab1 = gen_new_label(); lab2 = gen_new_label(); offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read); - offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg, - opc & 3, lab1, offset); + offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, + addrhi_reg, opc & 3, lab1, offset); /* TLB Hit. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25), + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, + (offset ? TCG_REG_R1 : TCG_REG_R25), offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset); - tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, TCG_REG_R20, opc); + tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, + TCG_REG_R20, opc); tcg_out_branch(s, lab2, 1); /* TLB Miss. */ /* label1: */ tcg_out_label(s, lab1, s->code_ptr); - argreg = TCG_REG_R26; - tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg); + argno = 0; + argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false); if (TARGET_LONG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg); + argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg); + } else { + argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false); } - tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); - - /* XXX/FIXME: suboptimal */ - tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], - tcg_target_call_iarg_regs[1]); - tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1], - tcg_target_call_iarg_regs[0]); - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], - TCG_AREG0); + argno = tcg_out_arg_reg32(s, argno, mem_index, true); + tcg_out_call(s, qemu_ld_helpers[opc & 3]); switch (opc) { @@ -1107,8 +1174,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) #endif } -static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, int datahi_reg, - int addr_reg, int opc) +static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, + int datahi_reg, int addr_reg, int opc) { #ifdef TARGET_WORDS_BIGENDIAN const int bswap = 0; @@ -1161,17 +1228,18 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) /* Note that addrhi_reg is only used for 64-bit guests. */ int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0); int mem_index = *args; - int lab1, lab2, argreg, offset; + int lab1, lab2, argno, next, offset; lab1 = gen_new_label(); lab2 = gen_new_label(); offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); - offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg, - opc, lab1, offset); + offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, + addrhi_reg, opc, lab1, offset); /* TLB Hit. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25), + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, + (offset ? TCG_REG_R1 : TCG_REG_R25), offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset); /* There are no indexed stores, so we must do this addition explitly. @@ -1184,63 +1252,46 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) /* label1: */ tcg_out_label(s, lab1, s->code_ptr); - argreg = TCG_REG_R26; - tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg); + argno = 0; + argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false); if (TARGET_LONG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg); + argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg); + } else { + argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false); } + next = (argno < 4 ? tcg_target_call_iarg_regs[argno] : TCG_REG_R20); switch(opc) { case 0: - tcg_out_andi(s, argreg--, datalo_reg, 0xff); - tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); + tcg_out_andi(s, next, datalo_reg, 0xff); + argno = tcg_out_arg_reg32(s, argno, next, false); break; case 1: - tcg_out_andi(s, argreg--, datalo_reg, 0xffff); - tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); + tcg_out_andi(s, next, datalo_reg, 0xffff); + argno = tcg_out_arg_reg32(s, argno, next, false); break; case 2: - tcg_out_mov(s, TCG_TYPE_I32, argreg--, datalo_reg); - tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); + argno = tcg_out_arg_reg32(s, argno, datalo_reg, false); break; case 3: - /* Because of the alignment required by the 64-bit data argument, - we will always use R23/R24. Also, we will always run out of - argument registers for storing mem_index, so that will have - to go on the stack. */ - if (mem_index == 0) { - argreg = TCG_REG_R0; - } else { - argreg = TCG_REG_R20; - tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); - } - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R23, datahi_reg); - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R24, datalo_reg); - tcg_out_st(s, TCG_TYPE_I32, argreg, TCG_REG_CALL_STACK, - TCG_TARGET_CALL_STACK_OFFSET - 4); + argno = tcg_out_arg_reg64(s, argno, datalo_reg, datahi_reg); break; default: tcg_abort(); } + argno = tcg_out_arg_reg32(s, argno, mem_index, true); - /* XXX/FIXME: suboptimal */ - tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], - tcg_target_call_iarg_regs[2]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], - tcg_target_call_iarg_regs[1]); - tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1], - tcg_target_call_iarg_regs[0]); - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], - TCG_AREG0); tcg_out_call(s, qemu_st_helpers[opc]); /* label2: */ tcg_out_label(s, lab2, s->code_ptr); #else - /* There are no indexed stores, so if GUEST_BASE is set we must do the add - explicitly. Careful to avoid R20, which is used for the bswaps to follow. */ + /* There are no indexed stores, so if GUEST_BASE is set we must do + the add explicitly. Careful to avoid R20, which is used for the + bswaps to follow. */ if (GUEST_BASE != 0) { - tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_GUEST_BASE_REG, INSN_ADDL); + tcg_out_arith(s, TCG_REG_R31, addrlo_reg, + TCG_GUEST_BASE_REG, INSN_ADDL); addrlo_reg = TCG_REG_R31; } tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, addrlo_reg, opc); @@ -1475,6 +1526,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, args[3], const_args[3], args[4], const_args[4]); break; + case INDEX_op_movcond_i32: + tcg_out_movcond(s, args[5], args[0], args[1], args[2], const_args[2], + args[3], const_args[3]); + break; + case INDEX_op_add2_i32: tcg_out_add2(s, args[0], args[1], args[2], args[3], args[4], args[5], const_args[4]); @@ -1583,6 +1639,10 @@ static const TCGTargetOpDef hppa_op_defs[] = { { INDEX_op_setcond_i32, { "r", "rZ", "rI" } }, { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rI", "rI" } }, + /* ??? We can actually support a signed 14-bit arg3, but we + only have existing constraints for a signed 11-bit. */ + { INDEX_op_movcond_i32, { "r", "rZ", "rI", "rI", "0" } }, + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rI", "rZ" } }, { INDEX_op_sub2_i32, { "r", "r", "rI", "rZ", "rK", "rZ" } }, diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h index 01ef9605f7..5351353719 100644 --- a/tcg/hppa/tcg-target.h +++ b/tcg/hppa/tcg-target.h @@ -96,6 +96,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 +#define TCG_TARGET_HAS_movcond_i32 1 /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */ diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 34c2df80a6..122d63630c 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -75,9 +75,7 @@ static const int tcg_target_call_iarg_regs[] = { TCG_REG_R8, TCG_REG_R9, #else - TCG_REG_EAX, - TCG_REG_EDX, - TCG_REG_ECX + /* 32 bit mode uses stack based calling convention (GCC default). */ #endif }; @@ -88,6 +86,18 @@ static const int tcg_target_call_oarg_regs[] = { #endif }; +/* Registers used with L constraint, which are the first argument + registers on x86_64, and two random call clobbered registers on + i386. */ +#if TCG_TARGET_REG_BITS == 64 +# define TCG_REG_L0 tcg_target_call_iarg_regs[0] +# define TCG_REG_L1 tcg_target_call_iarg_regs[1] +# define TCG_REG_L2 tcg_target_call_iarg_regs[2] +#else +# define TCG_REG_L0 TCG_REG_EAX +# define TCG_REG_L1 TCG_REG_EDX +#endif + static uint8_t *tb_ret_addr; static void patch_reloc(uint8_t *code_ptr, int type, @@ -114,16 +124,6 @@ static void patch_reloc(uint8_t *code_ptr, int type, } } -/* maximum number of register used for input function arguments */ -static inline int tcg_target_get_call_iarg_regs_count(int flags) -{ - if (TCG_TARGET_REG_BITS == 64) { - return 6; - } - - return 0; -} - /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { @@ -179,16 +179,16 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) /* qemu_ld/st address constraint */ case 'L': ct->ct |= TCG_CT_REG; - if (TCG_TARGET_REG_BITS == 64) { +#if TCG_TARGET_REG_BITS == 64 tcg_regset_set32(ct->u.regs, 0, 0xffff); - tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]); - tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]); - tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]); - } else { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L2); +#else tcg_regset_set32(ct->u.regs, 0, 0xff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX); - } + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); +#endif break; case 'e': @@ -249,6 +249,7 @@ static inline int tcg_target_const_match(tcg_target_long val, #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) #define OPC_BSWAP (0xc8 | P_EXT) #define OPC_CALL_Jz (0xe8) +#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */ #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) #define OPC_DEC_r32 (0x48) #define OPC_IMUL_GvEv (0xaf | P_EXT) @@ -263,6 +264,7 @@ static inline int tcg_target_const_match(tcg_target_long val, #define OPC_MOVB_EvGv (0x88) /* stores, more or less */ #define OPC_MOVL_EvGv (0x89) /* stores, more or less */ #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ +#define OPC_MOVB_EvIz (0xc6) #define OPC_MOVL_EvIz (0xc7) #define OPC_MOVL_Iv (0xb8) #define OPC_MOVSBL (0xbe | P_EXT) @@ -935,6 +937,24 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, } #endif +static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg c1, TCGArg c2, int const_c2, + TCGArg v1) +{ + tcg_out_cmp(s, c1, c2, const_c2, 0); + tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg c1, TCGArg c2, int const_c2, + TCGArg v1) +{ + tcg_out_cmp(s, c1, c2, const_c2, P_REXW); + tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1); +} +#endif + static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest) { tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5; @@ -1009,8 +1029,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx, uint8_t **label_ptr, int which) { const int addrlo = args[addrlo_idx]; - const int r0 = tcg_target_call_iarg_regs[0]; - const int r1 = tcg_target_call_iarg_regs[1]; + const int r0 = TCG_REG_L0; + const int r1 = TCG_REG_L1; TCGType type = TCG_TYPE_I32; int rexw = 0; @@ -1172,8 +1192,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, label_ptr, offsetof(CPUTLBEntry, addr_read)); /* TLB Hit. */ - tcg_out_qemu_ld_direct(s, data_reg, data_reg2, - tcg_target_call_iarg_regs[0], 0, opc); + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc); /* jmp label2 */ tcg_out8(s, OPC_JMP_short); @@ -1206,14 +1225,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx], mem_index); /* XXX/FIXME: suboptimal */ - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], - tcg_target_call_iarg_regs[2]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], - tcg_target_call_iarg_regs[1]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], - tcg_target_call_iarg_regs[0]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], - TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0); #endif tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]); @@ -1279,11 +1294,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, use the ADDR32 prefix. For now, do nothing. */ if (offset != GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_I64, - tcg_target_call_iarg_regs[0], GUEST_BASE); - tgen_arithr(s, ARITH_ADD + P_REXW, - tcg_target_call_iarg_regs[0], base); - base = tcg_target_call_iarg_regs[0]; + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE); + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base); + base = TCG_REG_L0; offset = 0; } } @@ -1304,8 +1317,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, /* ??? Ideally we wouldn't need a scratch register. For user-only, we could perform the bswap twice to restore the original value instead of moving to the scratch. But as it is, the L constraint - means that the second argument reg is definitely free here. */ - int scratch = tcg_target_call_iarg_regs[1]; + means that TCG_REG_L1 is definitely free here. */ + const int scratch = TCG_REG_L1; switch (sizeop) { case 0: @@ -1378,8 +1391,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, label_ptr, offsetof(CPUTLBEntry, addr_write)); /* TLB Hit. */ - tcg_out_qemu_st_direct(s, data_reg, data_reg2, - tcg_target_call_iarg_regs[0], 0, opc); + tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc); /* jmp label2 */ tcg_out8(s, OPC_JMP_short); @@ -1414,18 +1426,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, stack_adjust += 4; #else tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32), - tcg_target_call_iarg_regs[1], data_reg); - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index); + TCG_REG_L1, data_reg); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_L2, mem_index); stack_adjust = 0; /* XXX/FIXME: suboptimal */ - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], - tcg_target_call_iarg_regs[2]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], - tcg_target_call_iarg_regs[1]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], - tcg_target_call_iarg_regs[0]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], - TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0); #endif tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]); @@ -1452,11 +1460,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, use the ADDR32 prefix. For now, do nothing. */ if (offset != GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_I64, - tcg_target_call_iarg_regs[0], GUEST_BASE); - tgen_arithr(s, ARITH_ADD + P_REXW, - tcg_target_call_iarg_regs[0], base); - base = tcg_target_call_iarg_regs[0]; + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE); + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base); + base = TCG_REG_L0; offset = 0; } } @@ -1543,18 +1549,35 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; OP_32_64(st8): - tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, - args[0], args[1], args[2]); + if (const_args[0]) { + tcg_out_modrm_offset(s, OPC_MOVB_EvIz, + 0, args[1], args[2]); + tcg_out8(s, args[0]); + } else { + tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, + args[0], args[1], args[2]); + } break; OP_32_64(st16): - tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, - args[0], args[1], args[2]); + if (const_args[0]) { + tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, + 0, args[1], args[2]); + tcg_out16(s, args[0]); + } else { + tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, + args[0], args[1], args[2]); + } break; #if TCG_TARGET_REG_BITS == 64 case INDEX_op_st32_i64: #endif case INDEX_op_st_i32: - tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); + if (const_args[0]) { + tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]); + tcg_out32(s, args[0]); + } else { + tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); + } break; OP_32_64(add): @@ -1650,6 +1673,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_setcond32(s, args[3], args[0], args[1], args[2], const_args[2]); break; + case INDEX_op_movcond_i32: + tcg_out_movcond32(s, args[5], args[0], args[1], + args[2], const_args[2], args[3]); + break; OP_32_64(bswap16): tcg_out_rolw_8(s, args[0]); @@ -1758,7 +1785,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); break; case INDEX_op_st_i64: - tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); + if (const_args[0]) { + tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, + 0, args[1], args[2]); + tcg_out32(s, args[0]); + } else { + tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); + } break; case INDEX_op_qemu_ld32s: tcg_out_qemu_ld(s, args, 2 | 4); @@ -1772,6 +1805,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_setcond64(s, args[3], args[0], args[1], args[2], const_args[2]); break; + case INDEX_op_movcond_i64: + tcg_out_movcond64(s, args[5], args[0], args[1], + args[2], const_args[2], args[3]); + break; case INDEX_op_bswap64_i64: tcg_out_bswap64(s, args[0]); @@ -1820,9 +1857,9 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_ld16u_i32, { "r", "r" } }, { INDEX_op_ld16s_i32, { "r", "r" } }, { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "q", "r" } }, - { INDEX_op_st16_i32, { "r", "r" } }, - { INDEX_op_st_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "qi", "r" } }, + { INDEX_op_st16_i32, { "ri", "r" } }, + { INDEX_op_st_i32, { "ri", "r" } }, { INDEX_op_add_i32, { "r", "r", "ri" } }, { INDEX_op_sub_i32, { "r", "0", "ri" } }, @@ -1856,6 +1893,7 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_setcond_i32, { "q", "r", "ri" } }, { INDEX_op_deposit_i32, { "Q", "0", "Q" } }, + { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } }, #if TCG_TARGET_REG_BITS == 32 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, @@ -1873,10 +1911,10 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_ld32u_i64, { "r", "r" } }, { INDEX_op_ld32s_i64, { "r", "r" } }, { INDEX_op_ld_i64, { "r", "r" } }, - { INDEX_op_st8_i64, { "r", "r" } }, - { INDEX_op_st16_i64, { "r", "r" } }, - { INDEX_op_st32_i64, { "r", "r" } }, - { INDEX_op_st_i64, { "r", "r" } }, + { INDEX_op_st8_i64, { "ri", "r" } }, + { INDEX_op_st16_i64, { "ri", "r" } }, + { INDEX_op_st32_i64, { "ri", "r" } }, + { INDEX_op_st_i64, { "re", "r" } }, { INDEX_op_add_i64, { "r", "0", "re" } }, { INDEX_op_mul_i64, { "r", "0", "re" } }, @@ -1910,6 +1948,7 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_ext32u_i64, { "r", "r" } }, { INDEX_op_deposit_i64, { "Q", "0", "Q" } }, + { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } }, #endif #if TCG_TARGET_REG_BITS == 64 @@ -2008,15 +2047,17 @@ static void tcg_target_qemu_prologue(TCGContext *s) #if TCG_TARGET_REG_BITS == 32 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); - tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP, - (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4); + tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + /* jmp *tb. */ + tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, + (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 + + stack_addend); #else tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); -#endif tcg_out_addi(s, TCG_REG_ESP, -stack_addend); - /* jmp *tb. */ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); +#endif /* TB epilogue */ tb_ret_addr = s->code_ptr; diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 8be42f3ccb..ace63ba37b 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -67,7 +67,11 @@ typedef enum { /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_ESP #define TCG_TARGET_STACK_ALIGN 16 +#if defined(_WIN64) +#define TCG_TARGET_CALL_STACK_OFFSET 32 +#else #define TCG_TARGET_CALL_STACK_OFFSET 0 +#endif /* optional instructions */ #define TCG_TARGET_HAS_div2_i32 1 @@ -86,6 +90,12 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 +#if defined(__x86_64__) || defined(__i686__) +/* Use cmov only if the compiler is already doing so. */ +#define TCG_TARGET_HAS_movcond_i32 1 +#else +#define TCG_TARGET_HAS_movcond_i32 0 +#endif #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div2_i64 1 @@ -107,6 +117,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 1 +#define TCG_TARGET_HAS_movcond_i64 1 #endif #define TCG_TARGET_deposit_i32_valid(ofs, len) \ diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 1745038f14..dc9c12cf18 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -176,12 +176,6 @@ static const int tcg_target_call_oarg_regs[] = { TCG_REG_R8 }; -/* maximum number of register used for input function arguments */ -static inline int tcg_target_get_call_iarg_regs_count(int flags) -{ - return 8; -} - /* * opcode formation */ diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index c22962ac12..368aee4196 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -133,6 +133,8 @@ typedef enum { #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_deposit_i64 0 +#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_movcond_i64 0 /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub r1, r0, r3 */ diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 74db83d440..f70910a90f 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -68,7 +68,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #endif /* check if we really need so many registers :P */ -static const int tcg_target_reg_alloc_order[] = { +static const TCGReg tcg_target_reg_alloc_order[] = { TCG_REG_S0, TCG_REG_S1, TCG_REG_S2, @@ -94,14 +94,14 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_V1 }; -static const int tcg_target_call_iarg_regs[4] = { +static const TCGReg tcg_target_call_iarg_regs[4] = { TCG_REG_A0, TCG_REG_A1, TCG_REG_A2, TCG_REG_A3 }; -static const int tcg_target_call_oarg_regs[2] = { +static const TCGReg tcg_target_call_oarg_regs[2] = { TCG_REG_V0, TCG_REG_V1 }; @@ -185,12 +185,6 @@ static void patch_reloc(uint8_t *code_ptr, int type, } } -/* maximum number of register used for input function arguments */ -static inline int tcg_target_get_call_iarg_regs_count(int flags) -{ - return 4; -} - /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { @@ -278,6 +272,8 @@ static inline int tcg_target_const_match(tcg_target_long val, enum { OPC_BEQ = 0x04 << 26, OPC_BNE = 0x05 << 26, + OPC_BLEZ = 0x06 << 26, + OPC_BGTZ = 0x07 << 26, OPC_ADDIU = 0x09 << 26, OPC_SLTI = 0x0A << 26, OPC_SLTIU = 0x0B << 26, @@ -298,12 +294,16 @@ enum { OPC_SPECIAL = 0x00 << 26, OPC_SLL = OPC_SPECIAL | 0x00, OPC_SRL = OPC_SPECIAL | 0x02, + OPC_ROTR = OPC_SPECIAL | (0x01 << 21) | 0x02, OPC_SRA = OPC_SPECIAL | 0x03, OPC_SLLV = OPC_SPECIAL | 0x04, OPC_SRLV = OPC_SPECIAL | 0x06, + OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06, OPC_SRAV = OPC_SPECIAL | 0x07, OPC_JR = OPC_SPECIAL | 0x08, OPC_JALR = OPC_SPECIAL | 0x09, + OPC_MOVZ = OPC_SPECIAL | 0x0A, + OPC_MOVN = OPC_SPECIAL | 0x0B, OPC_MFHI = OPC_SPECIAL | 0x10, OPC_MFLO = OPC_SPECIAL | 0x12, OPC_MULT = OPC_SPECIAL | 0x18, @@ -319,7 +319,13 @@ enum { OPC_SLT = OPC_SPECIAL | 0x2A, OPC_SLTU = OPC_SPECIAL | 0x2B, + OPC_REGIMM = 0x01 << 26, + OPC_BLTZ = OPC_REGIMM | (0x00 << 16), + OPC_BGEZ = OPC_REGIMM | (0x01 << 16), + OPC_SPECIAL3 = 0x1f << 26, + OPC_INS = OPC_SPECIAL3 | 0x004, + OPC_WSBH = OPC_SPECIAL3 | 0x0a0, OPC_SEB = OPC_SPECIAL3 | 0x420, OPC_SEH = OPC_SPECIAL3 | 0x620, }; @@ -327,7 +333,8 @@ enum { /* * Type reg */ -static inline void tcg_out_opc_reg(TCGContext *s, int opc, int rd, int rs, int rt) +static inline void tcg_out_opc_reg(TCGContext *s, int opc, + TCGReg rd, TCGReg rs, TCGReg rt) { int32_t inst; @@ -341,7 +348,8 @@ static inline void tcg_out_opc_reg(TCGContext *s, int opc, int rd, int rs, int r /* * Type immediate */ -static inline void tcg_out_opc_imm(TCGContext *s, int opc, int rt, int rs, int imm) +static inline void tcg_out_opc_imm(TCGContext *s, int opc, + TCGReg rt, TCGReg rs, TCGArg imm) { int32_t inst; @@ -355,7 +363,8 @@ static inline void tcg_out_opc_imm(TCGContext *s, int opc, int rt, int rs, int i /* * Type branch */ -static inline void tcg_out_opc_br(TCGContext *s, int opc, int rt, int rs) +static inline void tcg_out_opc_br(TCGContext *s, int opc, + TCGReg rt, TCGReg rs) { /* We pay attention here to not modify the branch target by reading the existing value and using it again. This ensure that caches and @@ -368,7 +377,8 @@ static inline void tcg_out_opc_br(TCGContext *s, int opc, int rt, int rs) /* * Type sa */ -static inline void tcg_out_opc_sa(TCGContext *s, int opc, int rd, int rt, int sa) +static inline void tcg_out_opc_sa(TCGContext *s, int opc, + TCGReg rd, TCGReg rt, TCGArg sa) { int32_t inst; @@ -407,38 +417,47 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, } } -static inline void tcg_out_bswap16(TCGContext *s, int ret, int arg) +static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg) { +#ifdef _MIPS_ARCH_MIPS32R2 + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); +#else /* ret and arg can't be register at */ if (ret == TCG_REG_AT || arg == TCG_REG_AT) { tcg_abort(); } tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0x00ff); - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); +#endif } -static inline void tcg_out_bswap16s(TCGContext *s, int ret, int arg) +static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) { +#ifdef _MIPS_ARCH_MIPS32R2 + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); +#else /* ret and arg can't be register at */ if (ret == TCG_REG_AT || arg == TCG_REG_AT) { tcg_abort(); } tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff); - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); +#endif } -static inline void tcg_out_bswap32(TCGContext *s, int ret, int arg) +static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg) { +#ifdef _MIPS_ARCH_MIPS32R2 + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); +#else /* ret and arg must be different and can't be register at */ if (ret == arg || ret == TCG_REG_AT || arg == TCG_REG_AT) { tcg_abort(); @@ -456,9 +475,10 @@ static inline void tcg_out_bswap32(TCGContext *s, int ret, int arg) tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff00); tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); +#endif } -static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg) +static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg) { #ifdef _MIPS_ARCH_MIPS32R2 tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg); @@ -468,7 +488,7 @@ static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg) #endif } -static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg) +static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) { #ifdef _MIPS_ARCH_MIPS32R2 tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg); @@ -478,8 +498,8 @@ static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg) #endif } -static inline void tcg_out_ldst(TCGContext *s, int opc, int arg, - int arg1, tcg_target_long arg2) +static inline void tcg_out_ldst(TCGContext *s, int opc, TCGArg arg, + TCGReg arg1, TCGArg arg2) { if (arg2 == (int16_t) arg2) { tcg_out_opc_imm(s, opc, arg, arg1, arg2); @@ -502,7 +522,7 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, tcg_out_ldst(s, OPC_SW, arg, arg1, arg2); } -static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) +static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val) { if (val == (int16_t)val) { tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val); @@ -543,7 +563,7 @@ DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_reg16, TCGReg arg) #undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG #define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \ tcg_out_movi(s, TCG_TYPE_I32, A, arg); -DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, uint32_t arg) +DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, TCGArg arg) #undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG /* We don't use the macro for this one to avoid an unnecessary reg-reg @@ -573,8 +593,8 @@ static inline void tcg_out_call_iarg_reg64(TCGContext *s, int *arg_num, #endif } -static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1, - int arg2, int label_index) +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1, + TCGArg arg2, int label_index) { TCGLabel *l = &s->labels[label_index]; @@ -586,32 +606,48 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1, tcg_out_opc_br(s, OPC_BNE, arg1, arg2); break; case TCG_COND_LT: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); + if (arg2 == 0) { + tcg_out_opc_br(s, OPC_BLTZ, 0, arg1); + } else { + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2); + tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); + } break; case TCG_COND_LTU: tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2); tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); break; case TCG_COND_GE: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); + if (arg2 == 0) { + tcg_out_opc_br(s, OPC_BGEZ, 0, arg1); + } else { + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2); + tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); + } break; case TCG_COND_GEU: tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2); tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); break; case TCG_COND_LE: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); + if (arg2 == 0) { + tcg_out_opc_br(s, OPC_BLEZ, 0, arg1); + } else { + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1); + tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); + } break; case TCG_COND_LEU: tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1); tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); break; case TCG_COND_GT: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); + if (arg2 == 0) { + tcg_out_opc_br(s, OPC_BGTZ, 0, arg1); + } else { + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1); + tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); + } break; case TCG_COND_GTU: tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1); @@ -631,8 +667,9 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1, /* XXX: we implement it at the target level to avoid having to handle cross basic blocks temporaries */ -static void tcg_out_brcond2(TCGContext *s, TCGCond cond, int arg1, - int arg2, int arg3, int arg4, int label_index) +static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGArg arg1, + TCGArg arg2, TCGArg arg3, TCGArg arg4, + int label_index) { void *label_ptr; @@ -694,8 +731,70 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, int arg1, reloc_pc16(label_ptr, (tcg_target_long) s->code_ptr); } -static void tcg_out_setcond(TCGContext *s, TCGCond cond, int ret, - int arg1, int arg2) +static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGArg c1, TCGArg c2, TCGArg v) +{ + switch (cond) { + case TCG_COND_EQ: + if (c1 == 0) { + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c2); + } else if (c2 == 0) { + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c1); + } else { + tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + } + break; + case TCG_COND_NE: + if (c1 == 0) { + tcg_out_opc_reg(s, OPC_MOVN, ret, v, c2); + } else if (c2 == 0) { + tcg_out_opc_reg(s, OPC_MOVN, ret, v, c1); + } else { + tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + } + break; + case TCG_COND_LT: + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + break; + case TCG_COND_LTU: + tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + break; + case TCG_COND_GE: + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + break; + case TCG_COND_GEU: + tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + break; + case TCG_COND_LE: + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + break; + case TCG_COND_LEU: + tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + break; + case TCG_COND_GT: + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + break; + case TCG_COND_GTU: + tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + break; + default: + tcg_abort(); + break; + } +} + +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGArg arg1, TCGArg arg2) { switch (cond) { case TCG_COND_EQ: @@ -754,8 +853,8 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, int ret, /* XXX: we implement it at the target level to avoid having to handle cross basic blocks temporaries */ -static void tcg_out_setcond2(TCGContext *s, TCGCond cond, int ret, - int arg1, int arg2, int arg3, int arg4) +static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGArg arg1, TCGArg arg2, TCGArg arg3, TCGArg arg4) { switch (cond) { case TCG_COND_EQ: @@ -842,18 +941,17 @@ static const void * const qemu_st_helpers[4] = { static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) { - int addr_regl, addr_meml; - int data_regl, data_regh, data_reg1, data_reg2; - int mem_index, s_bits; + TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2; #if defined(CONFIG_SOFTMMU) void *label1_ptr, *label2_ptr; int arg_num; -#endif -#if TARGET_LONG_BITS == 64 -# if defined(CONFIG_SOFTMMU) + int mem_index, s_bits; + int addr_meml; +# if TARGET_LONG_BITS == 64 uint8_t *label3_ptr; + TCGReg addr_regh; + int addr_memh; # endif - int addr_regh, addr_memh; #endif data_regl = *args++; if (opc == 3) @@ -861,11 +959,22 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, else data_regh = 0; addr_regl = *args++; -#if TARGET_LONG_BITS == 64 +#if defined(CONFIG_SOFTMMU) +# if TARGET_LONG_BITS == 64 addr_regh = *args++; -#endif +# if defined(TCG_TARGET_WORDS_BIGENDIAN) + addr_memh = 0; + addr_meml = 4; +# else + addr_memh = 4; + addr_meml = 0; +# endif +# else + addr_meml = 0; +# endif mem_index = *args; s_bits = opc & 3; +#endif if (opc == 3) { #if defined(TCG_TARGET_WORDS_BIGENDIAN) @@ -879,18 +988,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, data_reg1 = data_regl; data_reg2 = 0; } -#if TARGET_LONG_BITS == 64 -# if defined(TCG_TARGET_WORDS_BIGENDIAN) - addr_memh = 0; - addr_meml = 4; -# else - addr_memh = 4; - addr_meml = 0; -# endif -#else - addr_meml = 0; -#endif - #if defined(CONFIG_SOFTMMU) tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); @@ -1029,50 +1126,56 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) { - int addr_regl, addr_meml; - int data_regl, data_regh, data_reg1, data_reg2; - int mem_index, s_bits; + TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2; #if defined(CONFIG_SOFTMMU) uint8_t *label1_ptr, *label2_ptr; int arg_num; + int mem_index, s_bits; + int addr_meml; #endif #if TARGET_LONG_BITS == 64 # if defined(CONFIG_SOFTMMU) uint8_t *label3_ptr; + TCGReg addr_regh; + int addr_memh; # endif - int addr_regh, addr_memh; #endif - data_regl = *args++; if (opc == 3) { data_regh = *args++; -#if defined(TCG_TARGET_WORDS_BIGENDIAN) - data_reg1 = data_regh; - data_reg2 = data_regl; -#else - data_reg1 = data_regl; - data_reg2 = data_regh; -#endif } else { - data_reg1 = data_regl; - data_reg2 = 0; data_regh = 0; } addr_regl = *args++; -#if TARGET_LONG_BITS == 64 +#if defined(CONFIG_SOFTMMU) +# if TARGET_LONG_BITS == 64 addr_regh = *args++; -# if defined(TCG_TARGET_WORDS_BIGENDIAN) +# if defined(TCG_TARGET_WORDS_BIGENDIAN) addr_memh = 0; addr_meml = 4; -# else +# else addr_memh = 4; addr_meml = 0; -# endif -#else +# endif +# else addr_meml = 0; -#endif +# endif mem_index = *args; s_bits = opc; +#endif + + if (opc == 3) { +#if defined(TCG_TARGET_WORDS_BIGENDIAN) + data_reg1 = data_regh; + data_reg2 = data_regl; +#else + data_reg1 = data_regl; + data_reg2 = data_regh; +#endif + } else { + data_reg1 = data_regl; + data_reg2 = 0; + } #if defined(CONFIG_SOFTMMU) tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); @@ -1157,7 +1260,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, break; case 1: if (TCG_NEED_BSWAP) { - tcg_out_bswap16(s, TCG_REG_T0, data_reg1); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_T0, data_reg1, 0xffff); + tcg_out_bswap16(s, TCG_REG_T0, TCG_REG_T0); tcg_out_opc_imm(s, OPC_SH, TCG_REG_T0, TCG_REG_A0, 0); } else { tcg_out_opc_imm(s, OPC_SH, data_reg1, TCG_REG_A0, 0); @@ -1377,6 +1481,31 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_opc_reg(s, OPC_SRLV, args[0], args[2], args[1]); } break; + case INDEX_op_rotl_i32: + if (const_args[2]) { + tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], 0x20 - args[2]); + } else { + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, 32); + tcg_out_opc_reg(s, OPC_SUBU, TCG_REG_AT, TCG_REG_AT, args[2]); + tcg_out_opc_reg(s, OPC_ROTRV, args[0], TCG_REG_AT, args[1]); + } + break; + case INDEX_op_rotr_i32: + if (const_args[2]) { + tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], args[2]); + } else { + tcg_out_opc_reg(s, OPC_ROTRV, args[0], args[2], args[1]); + } + break; + + /* The bswap routines do not work on non-R2 CPU. In that case + we let TCG generating the corresponding code. */ + case INDEX_op_bswap16_i32: + tcg_out_bswap16(s, args[0], args[1]); + break; + case INDEX_op_bswap32_i32: + tcg_out_bswap32(s, args[0], args[1]); + break; case INDEX_op_ext8s_i32: tcg_out_ext8s(s, args[0], args[1]); @@ -1385,6 +1514,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_ext16s(s, args[0], args[1]); break; + case INDEX_op_deposit_i32: + tcg_out_opc_imm(s, OPC_INS, args[0], args[2], + ((args[3] + args[4] - 1) << 11) | (args[3] << 6)); + break; + case INDEX_op_brcond_i32: tcg_out_brcond(s, args[2], args[0], args[1], args[3]); break; @@ -1392,6 +1526,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_brcond2(s, args[4], args[0], args[1], args[2], args[3], args[5]); break; + case INDEX_op_movcond_i32: + tcg_out_movcond(s, args[5], args[0], args[1], args[2], args[3]); + break; + case INDEX_op_setcond_i32: tcg_out_setcond(s, args[3], args[0], args[1], args[2]); break; @@ -1453,34 +1591,42 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_st16_i32, { "rZ", "r" } }, { INDEX_op_st_i32, { "rZ", "r" } }, - { INDEX_op_add_i32, { "r", "rZ", "rJZ" } }, + { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } }, { INDEX_op_div_i32, { "r", "rZ", "rZ" } }, { INDEX_op_divu_i32, { "r", "rZ", "rZ" } }, { INDEX_op_rem_i32, { "r", "rZ", "rZ" } }, { INDEX_op_remu_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_sub_i32, { "r", "rZ", "rJZ" } }, + { INDEX_op_sub_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_and_i32, { "r", "rZ", "rIZ" } }, + { INDEX_op_and_i32, { "r", "rZ", "rI" } }, { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, { INDEX_op_not_i32, { "r", "rZ" } }, { INDEX_op_or_i32, { "r", "rZ", "rIZ" } }, { INDEX_op_xor_i32, { "r", "rZ", "rIZ" } }, - { INDEX_op_shl_i32, { "r", "rZ", "riZ" } }, - { INDEX_op_shr_i32, { "r", "rZ", "riZ" } }, - { INDEX_op_sar_i32, { "r", "rZ", "riZ" } }, + { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, + { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, + { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, + { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, + { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, + + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, { INDEX_op_ext8s_i32, { "r", "rZ" } }, { INDEX_op_ext16s_i32, { "r", "rZ" } }, + { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + { INDEX_op_brcond_i32, { "rZ", "rZ" } }, + { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } }, { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } }, - { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } }, + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, + { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } }, #if TARGET_LONG_BITS == 32 @@ -1520,7 +1666,6 @@ static int tcg_target_callee_save_regs[] = { TCG_REG_S5, TCG_REG_S6, TCG_REG_S7, - TCG_REG_GP, TCG_REG_FP, TCG_REG_RA, /* should be last for ABI compliance */ }; @@ -1530,11 +1675,15 @@ static void tcg_target_qemu_prologue(TCGContext *s) { int i, frame_size; - /* reserve some stack space */ + /* reserve some stack space, also for TCG temps. */ frame_size = ARRAY_SIZE(tcg_target_callee_save_regs) * 4 - + TCG_STATIC_CALL_ARGS_SIZE; + + TCG_STATIC_CALL_ARGS_SIZE + + CPU_TEMP_BUF_NLONGS * sizeof(long); frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & ~(TCG_TARGET_STACK_ALIGN - 1); + tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4 + + TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); /* TB prologue */ tcg_out_addi(s, TCG_REG_SP, -frame_size); @@ -1586,8 +1735,7 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set_reg(s->reserved_regs, TCG_REG_T0); /* internal use */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */ tcg_add_target_add_op_defs(mips_op_defs); - tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf), - CPU_TEMP_BUF_NLONGS * sizeof(long)); } diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 1c6193180c..d147e70eb1 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -80,16 +80,34 @@ typedef enum { #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_nor_i32 1 -#define TCG_TARGET_HAS_rot_i32 0 #define TCG_TARGET_HAS_ext8s_i32 1 #define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 0 -#define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_andc_i32 0 #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 + +/* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */ +#if defined(_MIPS_ARCH_MIPS4) || defined(_MIPS_ARCH_MIPS32) || \ + defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_LOONGSON2E) || \ + defined(_MIPS_ARCH_LOONGSON2F) +#define TCG_TARGET_HAS_movcond_i32 1 +#else +#define TCG_TARGET_HAS_movcond_i32 0 +#endif + +/* optional instructions only implemented on MIPS32R2 */ +#ifdef _MIPS_ARCH_MIPS32R2 +#define TCG_TARGET_HAS_bswap16_i32 1 +#define TCG_TARGET_HAS_bswap32_i32 1 +#define TCG_TARGET_HAS_rot_i32 1 +#define TCG_TARGET_HAS_deposit_i32 1 +#else +#define TCG_TARGET_HAS_bswap16_i32 0 +#define TCG_TARGET_HAS_bswap32_i32 0 +#define TCG_TARGET_HAS_rot_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 +#endif /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */ diff --git a/tcg/optimize.c b/tcg/optimize.c index fba0ed9592..35532a1e03 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -39,8 +39,6 @@ typedef enum { TCG_TEMP_UNDEF = 0, TCG_TEMP_CONST, TCG_TEMP_COPY, - TCG_TEMP_HAS_COPY, - TCG_TEMP_ANY } tcg_temp_state; struct tcg_temp_info { @@ -52,39 +50,19 @@ struct tcg_temp_info { static struct tcg_temp_info temps[TCG_MAX_TEMPS]; -/* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some - class of equivalent temp's, a new representative should be chosen in this - class. */ -static void reset_temp(TCGArg temp, int nb_temps, int nb_globals) +/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove + the copy flag from the left temp. */ +static void reset_temp(TCGArg temp) { - int i; - TCGArg new_base = (TCGArg)-1; - if (temps[temp].state == TCG_TEMP_HAS_COPY) { - for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) { - if (i >= nb_globals) { - temps[i].state = TCG_TEMP_HAS_COPY; - new_base = i; - break; - } - } - for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) { - if (new_base == (TCGArg)-1) { - temps[i].state = TCG_TEMP_ANY; - } else { - temps[i].val = new_base; - } + if (temps[temp].state == TCG_TEMP_COPY) { + if (temps[temp].prev_copy == temps[temp].next_copy) { + temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; + } else { + temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; + temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; } - temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; - temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; - } else if (temps[temp].state == TCG_TEMP_COPY) { - temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; - temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; - new_base = temps[temp].val; - } - temps[temp].state = TCG_TEMP_ANY; - if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) { - temps[new_base].state = TCG_TEMP_ANY; } + temps[temp].state = TCG_TEMP_UNDEF; } static int op_bits(TCGOpcode op) @@ -107,36 +85,83 @@ static TCGOpcode op_to_movi(TCGOpcode op) } } -static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst, - TCGArg src, int nb_temps, int nb_globals) +static TCGArg find_better_copy(TCGContext *s, TCGArg temp) { - reset_temp(dst, nb_temps, nb_globals); - assert(temps[src].state != TCG_TEMP_COPY); - /* Don't try to copy if one of temps is a global or either one - is local and another is register */ - if (src >= nb_globals && dst >= nb_globals && - tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) { - assert(temps[src].state != TCG_TEMP_CONST); - if (temps[src].state != TCG_TEMP_HAS_COPY) { - temps[src].state = TCG_TEMP_HAS_COPY; + TCGArg i; + + /* If this is already a global, we can't do better. */ + if (temp < s->nb_globals) { + return temp; + } + + /* Search for a global first. */ + for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { + if (i < s->nb_globals) { + return i; + } + } + + /* If it is a temp, search for a temp local. */ + if (!s->temps[temp].temp_local) { + for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { + if (s->temps[i].temp_local) { + return i; + } + } + } + + /* Failure to find a better representation, return the same temp. */ + return temp; +} + +static bool temps_are_copies(TCGArg arg1, TCGArg arg2) +{ + TCGArg i; + + if (arg1 == arg2) { + return true; + } + + if (temps[arg1].state != TCG_TEMP_COPY + || temps[arg2].state != TCG_TEMP_COPY) { + return false; + } + + for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { + if (i == arg2) { + return true; + } + } + + return false; +} + +static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, + TCGArg dst, TCGArg src) +{ + reset_temp(dst); + assert(temps[src].state != TCG_TEMP_CONST); + + if (s->temps[src].type == s->temps[dst].type) { + if (temps[src].state != TCG_TEMP_COPY) { + temps[src].state = TCG_TEMP_COPY; temps[src].next_copy = src; temps[src].prev_copy = src; } temps[dst].state = TCG_TEMP_COPY; - temps[dst].val = src; temps[dst].next_copy = temps[src].next_copy; temps[dst].prev_copy = src; temps[temps[dst].next_copy].prev_copy = dst; temps[src].next_copy = dst; } + gen_args[0] = dst; gen_args[1] = src; } -static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val, - int nb_temps, int nb_globals) +static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val) { - reset_temp(dst, nb_temps, nb_globals); + reset_temp(dst); temps[dst].state = TCG_TEMP_CONST; temps[dst].val = val; gen_args[0] = dst; @@ -267,58 +292,88 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) return res; } +/* Return 2 if the condition can't be simplified, and the result + of the condition (0 or 1) if it can */ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, TCGArg y, TCGCond c) { - switch (op_bits(op)) { - case 32: + if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { + switch (op_bits(op)) { + case 32: + switch (c) { + case TCG_COND_EQ: + return (uint32_t)temps[x].val == (uint32_t)temps[y].val; + case TCG_COND_NE: + return (uint32_t)temps[x].val != (uint32_t)temps[y].val; + case TCG_COND_LT: + return (int32_t)temps[x].val < (int32_t)temps[y].val; + case TCG_COND_GE: + return (int32_t)temps[x].val >= (int32_t)temps[y].val; + case TCG_COND_LE: + return (int32_t)temps[x].val <= (int32_t)temps[y].val; + case TCG_COND_GT: + return (int32_t)temps[x].val > (int32_t)temps[y].val; + case TCG_COND_LTU: + return (uint32_t)temps[x].val < (uint32_t)temps[y].val; + case TCG_COND_GEU: + return (uint32_t)temps[x].val >= (uint32_t)temps[y].val; + case TCG_COND_LEU: + return (uint32_t)temps[x].val <= (uint32_t)temps[y].val; + case TCG_COND_GTU: + return (uint32_t)temps[x].val > (uint32_t)temps[y].val; + } + break; + case 64: + switch (c) { + case TCG_COND_EQ: + return (uint64_t)temps[x].val == (uint64_t)temps[y].val; + case TCG_COND_NE: + return (uint64_t)temps[x].val != (uint64_t)temps[y].val; + case TCG_COND_LT: + return (int64_t)temps[x].val < (int64_t)temps[y].val; + case TCG_COND_GE: + return (int64_t)temps[x].val >= (int64_t)temps[y].val; + case TCG_COND_LE: + return (int64_t)temps[x].val <= (int64_t)temps[y].val; + case TCG_COND_GT: + return (int64_t)temps[x].val > (int64_t)temps[y].val; + case TCG_COND_LTU: + return (uint64_t)temps[x].val < (uint64_t)temps[y].val; + case TCG_COND_GEU: + return (uint64_t)temps[x].val >= (uint64_t)temps[y].val; + case TCG_COND_LEU: + return (uint64_t)temps[x].val <= (uint64_t)temps[y].val; + case TCG_COND_GTU: + return (uint64_t)temps[x].val > (uint64_t)temps[y].val; + } + break; + } + } else if (temps_are_copies(x, y)) { switch (c) { - case TCG_COND_EQ: - return (uint32_t)x == (uint32_t)y; - case TCG_COND_NE: - return (uint32_t)x != (uint32_t)y; - case TCG_COND_LT: - return (int32_t)x < (int32_t)y; - case TCG_COND_GE: - return (int32_t)x >= (int32_t)y; - case TCG_COND_LE: - return (int32_t)x <= (int32_t)y; case TCG_COND_GT: - return (int32_t)x > (int32_t)y; case TCG_COND_LTU: - return (uint32_t)x < (uint32_t)y; - case TCG_COND_GEU: - return (uint32_t)x >= (uint32_t)y; - case TCG_COND_LEU: - return (uint32_t)x <= (uint32_t)y; + case TCG_COND_LT: case TCG_COND_GTU: - return (uint32_t)x > (uint32_t)y; - } - break; - case 64: - switch (c) { - case TCG_COND_EQ: - return (uint64_t)x == (uint64_t)y; case TCG_COND_NE: - return (uint64_t)x != (uint64_t)y; - case TCG_COND_LT: - return (int64_t)x < (int64_t)y; + return 0; case TCG_COND_GE: - return (int64_t)x >= (int64_t)y; + case TCG_COND_GEU: case TCG_COND_LE: - return (int64_t)x <= (int64_t)y; - case TCG_COND_GT: - return (int64_t)x > (int64_t)y; + case TCG_COND_LEU: + case TCG_COND_EQ: + return 1; + } + } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { + switch (c) { case TCG_COND_LTU: - return (uint64_t)x < (uint64_t)y; + return 0; case TCG_COND_GEU: - return (uint64_t)x >= (uint64_t)y; - case TCG_COND_LEU: - return (uint64_t)x <= (uint64_t)y; - case TCG_COND_GTU: - return (uint64_t)x > (uint64_t)y; + return 1; + default: + return 2; } - break; + } else { + return 2; } fprintf(stderr, @@ -327,7 +382,6 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, tcg_abort(); } - /* Propagate constants and copies, fold constant expressions. */ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, TCGArg *args, TCGOpDef *tcg_op_defs) @@ -337,12 +391,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, const TCGOpDef *def; TCGArg *gen_args; TCGArg tmp; + TCGCond cond; + /* Array VALS has an element for each temp. If this temp holds a constant then its value is kept in VALS' element. - If this temp is a copy of other ones then this equivalence class' - representative is kept in VALS' element. - If this temp is neither copy nor constant then corresponding VALS' - element is unused. */ + If this temp is a copy of other ones then the other copies are + available through the doubly linked circular list. */ nb_temps = s->nb_temps; nb_globals = s->nb_globals; @@ -354,11 +408,18 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, op = gen_opc_buf[op_index]; def = &tcg_op_defs[op]; /* Do copy propagation */ - if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) { - assert(op != INDEX_op_call); + if (op == INDEX_op_call) { + int nb_oargs = args[0] >> 16; + int nb_iargs = args[0] & 0xffff; + for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) { + if (temps[args[i]].state == TCG_TEMP_COPY) { + args[i] = find_better_copy(s, args[i]); + } + } + } else { for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) { if (temps[args[i]].state == TCG_TEMP_COPY) { - args[i] = temps[args[i]].val; + args[i] = find_better_copy(s, args[i]); } } } @@ -373,7 +434,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(eqv): CASE_OP_32_64(nand): CASE_OP_32_64(nor): - if (temps[args[1]].state == TCG_TEMP_CONST) { + /* Prefer the constant in second argument, and then the form + op a, a, b, which is better handled on non-RISC hosts. */ + if (temps[args[1]].state == TCG_TEMP_CONST || (args[0] == args[2] + && temps[args[2]].state != TCG_TEMP_CONST)) { tmp = args[1]; args[1] = args[2]; args[2] = tmp; @@ -397,6 +461,25 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, args[3] = tcg_swap_cond(args[3]); } break; + CASE_OP_32_64(movcond): + cond = args[5]; + if (temps[args[1]].state == TCG_TEMP_CONST + && temps[args[2]].state != TCG_TEMP_CONST) { + tmp = args[1]; + args[1] = args[2]; + args[2] = tmp; + cond = tcg_swap_cond(cond); + } + /* For movcond, we canonicalize the "false" input reg to match + the destination reg so that the tcg backend can implement + a "move if true" operation. */ + if (args[0] == args[3]) { + tmp = args[3]; + args[3] = args[4]; + args[4] = tmp; + cond = tcg_invert_cond(cond); + } + args[5] = cond; default: break; } @@ -411,7 +494,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[1]].val == 0) { gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals); + tcg_opt_gen_movi(gen_args, args[0], 0); args += 3; gen_args += 2; continue; @@ -438,14 +521,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } if (temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0) { - if ((temps[args[0]].state == TCG_TEMP_COPY - && temps[args[0]].val == args[1]) - || args[0] == args[1]) { + if (temps_are_copies(args[0], args[1])) { gen_opc_buf[op_index] = INDEX_op_nop; } else { gen_opc_buf[op_index] = op_to_mov(op); - tcg_opt_gen_mov(s, gen_args, args[0], args[1], - nb_temps, nb_globals); + tcg_opt_gen_mov(s, gen_args, args[0], args[1]); gen_args += 2; } args += 3; @@ -463,7 +543,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if ((temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0)) { gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals); + tcg_opt_gen_movi(gen_args, args[0], 0); args += 3; gen_args += 2; continue; @@ -477,13 +557,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, switch (op) { CASE_OP_32_64(or): CASE_OP_32_64(and): - if (args[1] == args[2]) { - if (args[1] == args[0]) { + if (temps_are_copies(args[1], args[2])) { + if (temps_are_copies(args[0], args[1])) { gen_opc_buf[op_index] = INDEX_op_nop; } else { gen_opc_buf[op_index] = op_to_mov(op); - tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps, - nb_globals); + tcg_opt_gen_mov(s, gen_args, args[0], args[1]); gen_args += 2; } args += 3; @@ -494,21 +573,34 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; } + /* Simplify expression for "op r, a, a => movi r, 0" cases */ + switch (op) { + CASE_OP_32_64(sub): + CASE_OP_32_64(xor): + if (temps_are_copies(args[1], args[2])) { + gen_opc_buf[op_index] = op_to_movi(op); + tcg_opt_gen_movi(gen_args, args[0], 0); + gen_args += 2; + args += 3; + continue; + } + break; + default: + break; + } + /* Propagate constants through copy operations and do constant folding. Constants will be substituted to arguments by register allocator where needed and possible. Also detect copies. */ switch (op) { CASE_OP_32_64(mov): - if ((temps[args[1]].state == TCG_TEMP_COPY - && temps[args[1]].val == args[0]) - || args[0] == args[1]) { + if (temps_are_copies(args[0], args[1])) { args += 2; gen_opc_buf[op_index] = INDEX_op_nop; break; } if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(s, gen_args, args[0], args[1], - nb_temps, nb_globals); + tcg_opt_gen_mov(s, gen_args, args[0], args[1]); gen_args += 2; args += 2; break; @@ -520,7 +612,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, args[1] = temps[args[1]].val; /* fallthrough */ CASE_OP_32_64(movi): - tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals); + tcg_opt_gen_movi(gen_args, args[0], args[1]); gen_args += 2; args += 2; break; @@ -535,9 +627,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (temps[args[1]].state == TCG_TEMP_CONST) { gen_opc_buf[op_index] = op_to_movi(op); tmp = do_constant_folding(op, temps[args[1]].val, 0); - tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals); + tcg_opt_gen_movi(gen_args, args[0], tmp); } else { - reset_temp(args[0], nb_temps, nb_globals); + reset_temp(args[0]); gen_args[0] = args[0]; gen_args[1] = args[1]; } @@ -565,10 +657,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, gen_opc_buf[op_index] = op_to_movi(op); tmp = do_constant_folding(op, temps[args[1]].val, temps[args[2]].val); - tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals); + tcg_opt_gen_movi(gen_args, args[0], tmp); gen_args += 2; } else { - reset_temp(args[0], nb_temps, nb_globals); + reset_temp(args[0]); gen_args[0] = args[0]; gen_args[1] = args[1]; gen_args[2] = args[2]; @@ -576,16 +668,34 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } args += 3; break; - CASE_OP_32_64(setcond): + CASE_OP_32_64(deposit): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST) { gen_opc_buf[op_index] = op_to_movi(op); - tmp = do_constant_folding_cond(op, temps[args[1]].val, - temps[args[2]].val, args[3]); - tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals); + tmp = ((1ull << args[4]) - 1); + tmp = (temps[args[1]].val & ~(tmp << args[3])) + | ((temps[args[2]].val & tmp) << args[3]); + tcg_opt_gen_movi(gen_args, args[0], tmp); + gen_args += 2; + } else { + reset_temp(args[0]); + gen_args[0] = args[0]; + gen_args[1] = args[1]; + gen_args[2] = args[2]; + gen_args[3] = args[3]; + gen_args[4] = args[4]; + gen_args += 5; + } + args += 5; + break; + CASE_OP_32_64(setcond): + tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); + if (tmp != 2) { + gen_opc_buf[op_index] = op_to_movi(op); + tcg_opt_gen_movi(gen_args, args[0], tmp); gen_args += 2; } else { - reset_temp(args[0], nb_temps, nb_globals); + reset_temp(args[0]); gen_args[0] = args[0]; gen_args[1] = args[1]; gen_args[2] = args[2]; @@ -595,10 +705,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, args += 4; break; CASE_OP_32_64(brcond): - if (temps[args[0]].state == TCG_TEMP_CONST - && temps[args[1]].state == TCG_TEMP_CONST) { - if (do_constant_folding_cond(op, temps[args[0]].val, - temps[args[1]].val, args[2])) { + tmp = do_constant_folding_cond(op, args[0], args[1], args[2]); + if (tmp != 2) { + if (tmp) { memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); gen_opc_buf[op_index] = INDEX_op_br; gen_args[0] = args[3]; @@ -608,7 +717,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } } else { memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); - reset_temp(args[0], nb_temps, nb_globals); + reset_temp(args[0]); gen_args[0] = args[0]; gen_args[1] = args[1]; gen_args[2] = args[2]; @@ -617,15 +726,41 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } args += 4; break; + CASE_OP_32_64(movcond): + tmp = do_constant_folding_cond(op, args[1], args[2], args[5]); + if (tmp != 2) { + if (temps_are_copies(args[0], args[4-tmp])) { + gen_opc_buf[op_index] = INDEX_op_nop; + } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { + gen_opc_buf[op_index] = op_to_movi(op); + tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val); + gen_args += 2; + } else { + gen_opc_buf[op_index] = op_to_mov(op); + tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]); + gen_args += 2; + } + } else { + reset_temp(args[0]); + gen_args[0] = args[0]; + gen_args[1] = args[1]; + gen_args[2] = args[2]; + gen_args[3] = args[3]; + gen_args[4] = args[4]; + gen_args[5] = args[5]; + gen_args += 6; + } + args += 6; + break; case INDEX_op_call: nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) { for (i = 0; i < nb_globals; i++) { - reset_temp(i, nb_temps, nb_globals); + reset_temp(i); } } for (i = 0; i < (args[0] >> 16); i++) { - reset_temp(args[i + 1], nb_temps, nb_globals); + reset_temp(args[i + 1]); } i = nb_call_args + 3; while (i) { @@ -635,21 +770,17 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, i--; } break; - case INDEX_op_set_label: - case INDEX_op_jmp: - case INDEX_op_br: - memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); - for (i = 0; i < def->nb_args; i++) { - *gen_args = *args; - args++; - gen_args++; - } - break; default: /* Default case: we do know nothing about operation so no - propagation is done. We only trash output args. */ - for (i = 0; i < def->nb_oargs; i++) { - reset_temp(args[i], nb_temps, nb_globals); + propagation is done. We trash everything if the operation + is the end of a basic block, otherwise we only trash the + output args. */ + if (def->flags & TCG_OPF_BB_END) { + memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); + } else { + for (i = 0; i < def->nb_oargs; i++) { + reset_temp(args[i]); + } } for (i = 0; i < def->nb_args; i++) { gen_args[i] = args[i]; diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 26c4b33e60..90c275d698 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -221,12 +221,6 @@ static void patch_reloc(uint8_t *code_ptr, int type, } } -/* maximum number of register used for input function arguments */ -static int tcg_target_get_call_iarg_regs_count(int flags) -{ - return ARRAY_SIZE (tcg_target_call_iarg_regs); -} - /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { @@ -390,6 +384,7 @@ static int tcg_target_const_match(tcg_target_long val, #define ORC XO31(412) #define EQV XO31(284) #define NAND XO31(476) +#define ISEL XO31( 15) #define LBZX XO31( 87) #define LHZX XO31(279) @@ -1269,6 +1264,72 @@ static void tcg_out_setcond2 (TCGContext *s, const TCGArg *args, ); } +static void tcg_out_movcond (TCGContext *s, TCGCond cond, + TCGArg dest, + TCGArg c1, TCGArg c2, + TCGArg v1, TCGArg v2, + int const_c2) +{ + tcg_out_cmp (s, cond, c1, c2, const_c2, 7); + + if (1) { + /* At least here on 7747A bit twiddling hacks are outperformed + by jumpy code (the testing was not scientific) */ + if (dest == v2) { + cond = tcg_invert_cond (cond); + v2 = v1; + } + else { + if (dest != v1) { + tcg_out_mov (s, TCG_TYPE_I32, dest, v1); + } + } + /* Branch forward over one insn */ + tcg_out32 (s, tcg_to_bc[cond] | 8); + tcg_out_mov (s, TCG_TYPE_I32, dest, v2); + } + else { + /* isel version, "if (1)" above should be replaced once a way + to figure out availability of isel on the underlying + hardware is found */ + int tab, bc; + + switch (cond) { + case TCG_COND_EQ: + tab = TAB (dest, v1, v2); + bc = CR_EQ; + break; + case TCG_COND_NE: + tab = TAB (dest, v2, v1); + bc = CR_EQ; + break; + case TCG_COND_LTU: + case TCG_COND_LT: + tab = TAB (dest, v1, v2); + bc = CR_LT; + break; + case TCG_COND_GEU: + case TCG_COND_GE: + tab = TAB (dest, v2, v1); + bc = CR_LT; + break; + case TCG_COND_LEU: + case TCG_COND_LE: + tab = TAB (dest, v2, v1); + bc = CR_GT; + break; + case TCG_COND_GTU: + case TCG_COND_GT: + tab = TAB (dest, v1, v2); + bc = CR_GT; + break; + default: + tcg_abort (); + } + tcg_out32 (s, ISEL | tab | ((bc + 28) << 6)); + } +} + static void tcg_out_brcond (TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, int label_index) @@ -1826,6 +1887,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, ); break; + case INDEX_op_movcond_i32: + tcg_out_movcond (s, args[5], args[0], + args[1], args[2], + args[3], args[4], + const_args[2]); + break; + default: tcg_dump_ops (s); tcg_abort (); @@ -1922,6 +1990,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_ext16u_i32, { "r", "r" } }, { INDEX_op_deposit_i32, { "r", "0", "r" } }, + { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "r" } }, { -1 }, }; diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 2f37fd289b..3259d898ab 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -92,6 +92,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 1 #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_deposit_i32 1 +#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_AREG0 TCG_REG_R27 diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 337cd419f8..19944bc427 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -208,12 +208,6 @@ static void patch_reloc (uint8_t *code_ptr, int type, } } -/* maximum number of register used for input function arguments */ -static int tcg_target_get_call_iarg_regs_count (int flags) -{ - return ARRAY_SIZE (tcg_target_call_iarg_regs); -} - /* parse target specific constraints */ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str) { diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h index 97eec0843d..57569e8938 100644 --- a/tcg/ppc64/tcg-target.h +++ b/tcg/ppc64/tcg-target.h @@ -83,6 +83,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 +#define TCG_TARGET_HAS_movcond_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rot_i64 0 @@ -103,6 +104,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 +#define TCG_TARGET_HAS_movcond_i64 0 #define TCG_AREG0 TCG_REG_R27 diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index aac11d9b61..3b90605fb1 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -356,11 +356,6 @@ static void patch_reloc(uint8_t *code_ptr, int type, } } -static int tcg_target_get_call_iarg_regs_count(int flags) -{ - return sizeof(tcg_target_call_iarg_regs) / sizeof(int); -} - /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 4f7dfaba50..ed55c331c6 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -63,6 +63,7 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 +#define TCG_TARGET_HAS_movcond_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div2_i64 1 @@ -84,6 +85,7 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 +#define TCG_TARGET_HAS_movcond_i64 0 #endif #define TCG_TARGET_HAS_GUEST_BASE diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index baed3b49ff..8fd7f86de0 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -59,7 +59,15 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { }; #endif -#define ARG_OFFSET 1 +/* Define some temporary registers. T2 is used for constant generation. */ +#define TCG_REG_T1 TCG_REG_G1 +#define TCG_REG_T2 TCG_REG_O7 + +#ifdef CONFIG_USE_GUEST_BASE +# define TCG_GUEST_BASE_REG TCG_REG_I5 +#else +# define TCG_GUEST_BASE_REG TCG_REG_G0 +#endif static const int tcg_target_reg_alloc_order[] = { TCG_REG_L0, @@ -70,11 +78,25 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_L5, TCG_REG_L6, TCG_REG_L7, + TCG_REG_I0, TCG_REG_I1, TCG_REG_I2, TCG_REG_I3, TCG_REG_I4, + TCG_REG_I5, + + TCG_REG_G2, + TCG_REG_G3, + TCG_REG_G4, + TCG_REG_G5, + + TCG_REG_O0, + TCG_REG_O1, + TCG_REG_O2, + TCG_REG_O3, + TCG_REG_O4, + TCG_REG_O5, }; static const int tcg_target_call_iarg_regs[6] = { @@ -133,12 +155,6 @@ static void patch_reloc(uint8_t *code_ptr, int type, } } -/* maximum number of register used for input function arguments */ -static inline int tcg_target_get_call_iarg_regs_count(int flags) -{ - return 6; -} - /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { @@ -157,7 +173,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1); tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_O3); break; case 'I': ct->ct |= TCG_CT_CONST_S11; @@ -236,7 +251,7 @@ static inline int tcg_target_const_match(tcg_target_long val, #define ARITH_XOR (INSN_OP(2) | INSN_OP3(0x03)) #define ARITH_SUB (INSN_OP(2) | INSN_OP3(0x04)) #define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14)) -#define ARITH_ADDX (INSN_OP(2) | INSN_OP3(0x10)) +#define ARITH_ADDX (INSN_OP(2) | INSN_OP3(0x08)) #define ARITH_SUBX (INSN_OP(2) | INSN_OP3(0x0c)) #define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a)) #define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e)) @@ -288,6 +303,16 @@ static inline int tcg_target_const_match(tcg_target_long val, #define ASI_PRIMARY_LITTLE 0x88 #endif +#define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE)) + +#define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE)) + static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2, int op) { @@ -353,71 +378,50 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, tcg_out_sethi(s, ret, ~arg); tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR); } else { - tcg_out_movi_imm32(s, TCG_REG_I4, arg >> (TCG_TARGET_REG_BITS / 2)); - tcg_out_arithi(s, TCG_REG_I4, TCG_REG_I4, 32, SHIFT_SLLX); - tcg_out_movi_imm32(s, ret, arg); - tcg_out_arith(s, ret, ret, TCG_REG_I4, ARITH_OR); + tcg_out_movi_imm32(s, ret, arg >> (TCG_TARGET_REG_BITS / 2)); + tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); + tcg_out_movi_imm32(s, TCG_REG_T2, arg); + tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR); } } -static inline void tcg_out_ld_raw(TCGContext *s, int ret, - tcg_target_long arg) -{ - tcg_out_sethi(s, ret, arg); - tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) | - INSN_IMM13(arg & 0x3ff)); -} - -static inline void tcg_out_ld_ptr(TCGContext *s, int ret, - tcg_target_long arg) +static inline void tcg_out_ldst_rr(TCGContext *s, int data, int a1, + int a2, int op) { - if (!check_fit_tl(arg, 10)) - tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ffULL); - if (TCG_TARGET_REG_BITS == 64) { - tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) | - INSN_IMM13(arg & 0x3ff)); - } else { - tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) | - INSN_IMM13(arg & 0x3ff)); - } + tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2)); } -static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int offset, int op) +static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, + int offset, int op) { - if (check_fit_tl(offset, 13)) + if (check_fit_tl(offset, 13)) { tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) | INSN_IMM13(offset)); - else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset); - tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) | - INSN_RS2(addr)); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset); + tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op); } } -static inline void tcg_out_ldst_asi(TCGContext *s, int ret, int addr, - int offset, int op, int asi) -{ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset); - tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) | - INSN_ASI(asi) | INSN_RS2(addr)); -} - static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, tcg_target_long arg2) { - if (type == TCG_TYPE_I32) - tcg_out_ldst(s, ret, arg1, arg2, LDUW); - else - tcg_out_ldst(s, ret, arg1, arg2, LDX); + tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX)); } static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, tcg_target_long arg2) { - if (type == TCG_TYPE_I32) - tcg_out_ldst(s, arg, arg1, arg2, STW); - else - tcg_out_ldst(s, arg, arg1, arg2, STX); + tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX)); +} + +static inline void tcg_out_ld_ptr(TCGContext *s, int ret, + tcg_target_long arg) +{ + if (!check_fit_tl(arg, 10)) { + tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); + } + tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff); } static inline void tcg_out_sety(TCGContext *s, int rs) @@ -436,20 +440,21 @@ static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) if (check_fit_tl(val, 13)) tcg_out_arithi(s, reg, reg, val, ARITH_ADD); else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, val); - tcg_out_arith(s, reg, reg, TCG_REG_I5, ARITH_ADD); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, val); + tcg_out_arith(s, reg, reg, TCG_REG_T1, ARITH_ADD); } } } -static inline void tcg_out_andi(TCGContext *s, int reg, tcg_target_long val) +static inline void tcg_out_andi(TCGContext *s, int rd, int rs, + tcg_target_long val) { if (val != 0) { if (check_fit_tl(val, 13)) - tcg_out_arithi(s, reg, reg, val, ARITH_AND); + tcg_out_arithi(s, rd, rs, val, ARITH_AND); else { - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, val); - tcg_out_arith(s, reg, reg, TCG_REG_I5, ARITH_AND); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T1, val); + tcg_out_arith(s, rd, rs, TCG_REG_T1, ARITH_AND); } } } @@ -461,8 +466,8 @@ static void tcg_out_div32(TCGContext *s, int rd, int rs1, if (uns) { tcg_out_sety(s, TCG_REG_G0); } else { - tcg_out_arithi(s, TCG_REG_I5, rs1, 31, SHIFT_SRA); - tcg_out_sety(s, TCG_REG_I5); + tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA); + tcg_out_sety(s, TCG_REG_T1); } tcg_out_arithc(s, rd, rs1, val2, val2const, @@ -477,30 +482,33 @@ static inline void tcg_out_nop(TCGContext *s) static void tcg_out_branch_i32(TCGContext *s, int opc, int label_index) { TCGLabel *l = &s->labels[label_index]; + uint32_t off22; if (l->has_value) { - tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) - | INSN_OFF22(l->u.value - (unsigned long)s->code_ptr))); + off22 = INSN_OFF22(l->u.value - (unsigned long)s->code_ptr); } else { + /* Make sure to preserve destinations during retranslation. */ + off22 = *(uint32_t *)s->code_ptr & INSN_OFF22(-1); tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP22, label_index, 0); - tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | 0)); } + tcg_out32(s, INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | off22); } #if TCG_TARGET_REG_BITS == 64 static void tcg_out_branch_i64(TCGContext *s, int opc, int label_index) { TCGLabel *l = &s->labels[label_index]; + uint32_t off19; if (l->has_value) { - tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) | - (0x5 << 19) | - INSN_OFF19(l->u.value - (unsigned long)s->code_ptr))); + off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr); } else { + /* Make sure to preserve destinations during retranslation. */ + off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1); tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label_index, 0); - tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) | - (0x5 << 19) | 0)); } + tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) | + (0x5 << 19) | off19)); } #endif @@ -608,8 +616,8 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, case TCG_COND_GTU: case TCG_COND_GEU: if (c2const && c2 != 0) { - tcg_out_movi_imm13(s, TCG_REG_I5, c2); - c2 = TCG_REG_I5; + tcg_out_movi_imm13(s, TCG_REG_T1, c2); + c2 = TCG_REG_T1; } t = c1, c1 = c2, c2 = t, c2const = 0; cond = tcg_swap_cond(cond); @@ -621,18 +629,10 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, default: tcg_out_cmp(s, c1, c2, c2const); -#if defined(__sparc_v9__) || defined(__sparc_v8plus__) - tcg_out_movi_imm13(s, ret, 0); - tcg_out32 (s, ARITH_MOVCC | INSN_RD(ret) - | INSN_RS1(tcg_cond_to_bcond[cond]) - | MOVCC_ICC | INSN_IMM11(1)); -#else - t = gen_new_label(); - tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[cond], 1), t); - tcg_out_movi_imm13(s, ret, 1); tcg_out_movi_imm13(s, ret, 0); - tcg_out_label(s, t, s->code_ptr); -#endif + tcg_out32(s, ARITH_MOVCC | INSN_RD(ret) + | INSN_RS1(tcg_cond_to_bcond[cond]) + | MOVCC_ICC | INSN_IMM11(1)); return; } @@ -664,15 +664,15 @@ static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret, switch (cond) { case TCG_COND_EQ: - tcg_out_setcond_i32(s, TCG_COND_EQ, TCG_REG_I5, al, bl, blconst); + tcg_out_setcond_i32(s, TCG_COND_EQ, TCG_REG_T1, al, bl, blconst); tcg_out_setcond_i32(s, TCG_COND_EQ, ret, ah, bh, bhconst); - tcg_out_arith(s, ret, ret, TCG_REG_I5, ARITH_AND); + tcg_out_arith(s, ret, ret, TCG_REG_T1, ARITH_AND); break; case TCG_COND_NE: - tcg_out_setcond_i32(s, TCG_COND_NE, TCG_REG_I5, al, al, blconst); + tcg_out_setcond_i32(s, TCG_COND_NE, TCG_REG_T1, al, al, blconst); tcg_out_setcond_i32(s, TCG_COND_NE, ret, ah, bh, bhconst); - tcg_out_arith(s, ret, ret, TCG_REG_I5, ARITH_OR); + tcg_out_arith(s, ret, ret, TCG_REG_T1, ARITH_OR); break; default: @@ -695,14 +695,36 @@ static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret, /* Generate global QEMU prologue and epilogue code */ static void tcg_target_qemu_prologue(TCGContext *s) { - tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_CALL_STACK_OFFSET, - CPU_TEMP_BUF_NLONGS * (int)sizeof(long)); + int tmp_buf_size, frame_size; + + /* The TCG temp buffer is at the top of the frame, immediately + below the frame pointer. */ + tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long); + tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size, + tmp_buf_size); + + /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is + otherwise the minimal frame usable by callees. */ + frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS; + frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size; + frame_size += TCG_TARGET_STACK_ALIGN - 1; + frame_size &= -TCG_TARGET_STACK_ALIGN; tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) | - INSN_IMM13(-(TCG_TARGET_STACK_MINFRAME + - CPU_TEMP_BUF_NLONGS * (int)sizeof(long)))); + INSN_IMM13(-frame_size)); + +#ifdef CONFIG_USE_GUEST_BASE + if (GUEST_BASE != 0) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } +#endif + tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I1) | INSN_RS2(TCG_REG_G0)); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_I0); + /* delay slot */ + tcg_out_nop(s); + + /* No epilogue required. We issue ret + restore directly in the TB. */ } #if defined(CONFIG_SOFTMMU) @@ -726,418 +748,309 @@ static const void * const qemu_st_helpers[4] = { helper_stl_mmu, helper_stq_mmu, }; -#endif -#if TARGET_LONG_BITS == 32 -#define TARGET_LD_OP LDUW -#else -#define TARGET_LD_OP LDX -#endif +/* Perform the TLB load and compare. -#if defined(CONFIG_SOFTMMU) -#if HOST_LONG_BITS == 32 -#define TARGET_ADDEND_LD_OP LDUW + Inputs: + ADDRLO_IDX contains the index into ARGS of the low part of the + address; the high part of the address is at ADDR_LOW_IDX+1. + + MEM_INDEX and S_BITS are the memory context and log2 size of the load. + + WHICH is the offset into the CPUTLBEntry structure of the slot to read. + This should be offsetof addr_read or addr_write. + + The result of the TLB comparison is in %[ix]cc. The sanitized address + is in the returned register, maybe %o0. The TLB addend is in %o1. */ + +static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index, + int s_bits, const TCGArg *args, int which) +{ + const int addrlo = args[addrlo_idx]; + const int r0 = TCG_REG_O0; + const int r1 = TCG_REG_O1; + const int r2 = TCG_REG_O2; + int addr = addrlo; + int tlb_ofs; + + if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) { + /* Assemble the 64-bit address in R0. */ + tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL); + tcg_out_arithi(s, r1, args[addrlo_idx + 1], 32, SHIFT_SLLX); + tcg_out_arith(s, r0, r0, r1, ARITH_OR); + } + + /* Shift the page number down to tlb-entry. */ + tcg_out_arithi(s, r1, addrlo, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL); + + /* Mask out the page offset, except for the required alignment. */ + tcg_out_andi(s, r0, addr, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + + /* Compute tlb index, modulo tlb size. */ + tcg_out_andi(s, r1, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + + /* Relative to the current ENV. */ + tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD); + + /* Find a base address that can load both tlb comparator and addend. */ + tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]); + if (!check_fit_tl(tlb_ofs + sizeof(CPUTLBEntry), 13)) { + tcg_out_addi(s, r1, tlb_ofs); + tlb_ofs = 0; + } + + /* Load the tlb comparator and the addend. */ + tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which); + tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend)); + + /* subcc arg0, arg2, %g0 */ + tcg_out_cmp(s, r0, r2, 0); + + /* If the guest address must be zero-extended, do so now. */ + if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL); + return r0; + } + return addrlo; +} +#endif /* CONFIG_SOFTMMU */ + +static const int qemu_ld_opc[8] = { +#ifdef TARGET_WORDS_BIGENDIAN + LDUB, LDUH, LDUW, LDX, LDSB, LDSH, LDSW, LDX #else -#define TARGET_ADDEND_LD_OP LDX -#endif + LDUB, LDUH_LE, LDUW_LE, LDX_LE, LDSB, LDSH_LE, LDSW_LE, LDX_LE #endif +}; -#ifdef __arch64__ -#define HOST_LD_OP LDX -#define HOST_ST_OP STX -#define HOST_SLL_OP SHIFT_SLLX -#define HOST_SRA_OP SHIFT_SRAX +static const int qemu_st_opc[4] = { +#ifdef TARGET_WORDS_BIGENDIAN + STB, STH, STW, STX #else -#define HOST_LD_OP LDUW -#define HOST_ST_OP STW -#define HOST_SLL_OP SHIFT_SLL -#define HOST_SRA_OP SHIFT_SRA + STB, STH_LE, STW_LE, STX_LE #endif +}; -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, - int opc) +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop) { - int addr_reg, data_reg, arg0, arg1, arg2, mem_index, s_bits; + int addrlo_idx = 1, datalo, datahi, addr_reg; #if defined(CONFIG_SOFTMMU) - uint32_t *label1_ptr, *label2_ptr; + int memi_idx, memi, s_bits, n; + uint32_t *label_ptr[2]; #endif - data_reg = *args++; - addr_reg = *args++; - mem_index = *args; - s_bits = opc & 3; - - arg0 = TCG_REG_O0; - arg1 = TCG_REG_O1; - arg2 = TCG_REG_O2; + datahi = datalo = args[0]; + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + datahi = args[1]; + addrlo_idx = 2; + } #if defined(CONFIG_SOFTMMU) - /* srl addr_reg, x, arg1 */ - tcg_out_arithi(s, arg1, addr_reg, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, - SHIFT_SRL); - /* and addr_reg, x, arg0 */ - tcg_out_arithi(s, arg0, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1), - ARITH_AND); - - /* and arg1, x, arg1 */ - tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS); + memi = args[memi_idx]; + s_bits = sizeop & 3; + + addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, s_bits, args, + offsetof(CPUTLBEntry, addr_read)); + + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + int reg64; + + /* bne,pn %[xi]cc, label0 */ + label_ptr[0] = (uint32_t *)s->code_ptr; + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1) + | ((TARGET_LONG_BITS == 64) << 21))); + + /* TLB Hit. */ + /* Load all 64-bits into an O/G register. */ + reg64 = (datalo < 16 ? datalo : TCG_REG_O0); + tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]); + + /* Move the two 32-bit pieces into the destination registers. */ + tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX); + if (reg64 != datalo) { + tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64); + } - /* add arg1, x, arg1 */ - tcg_out_addi(s, arg1, offsetof(CPUArchState, - tlb_table[mem_index][0].addr_read)); + /* b,a,pt label1 */ + label_ptr[1] = (uint32_t *)s->code_ptr; + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1) + | (1 << 29) | (1 << 19))); + } else { + /* The fast path is exactly one insn. Thus we can perform the + entire TLB Hit in the (annulled) delay slot of the branch + over the TLB Miss case. */ + + /* beq,a,pt %[xi]cc, label0 */ + label_ptr[0] = NULL; + label_ptr[1] = (uint32_t *)s->code_ptr; + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) + | ((TARGET_LONG_BITS == 64) << 21) + | (1 << 29) | (1 << 19))); + /* delay slot */ + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]); + } - /* add env, arg1, arg1 */ - tcg_out_arith(s, arg1, TCG_AREG0, arg1, ARITH_ADD); + /* TLB Miss. */ - /* ld [arg1], arg2 */ - tcg_out32(s, TARGET_LD_OP | INSN_RD(arg2) | INSN_RS1(arg1) | - INSN_RS2(TCG_REG_G0)); + if (label_ptr[0]) { + *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr - + (unsigned long)label_ptr[0]); + } + n = 0; + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], + args[addrlo_idx + 1]); + } + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], + args[addrlo_idx]); - /* subcc arg0, arg2, %g0 */ - tcg_out_arith(s, TCG_REG_G0, arg0, arg2, ARITH_SUBCC); - - /* will become: - be label1 - or - be,pt %xcc label1 */ - label1_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, 0); - - /* mov (delay slot) */ - tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg); - - /* mov */ - tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index); - /* XXX/FIXME: suboptimal */ - tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], - tcg_target_call_iarg_regs[2]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], - tcg_target_call_iarg_regs[1]); - tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1], - tcg_target_call_iarg_regs[0]); - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], - TCG_AREG0); - - /* XXX: move that code at the end of the TB */ /* qemu_ld_helper[s_bits](arg0, arg1) */ tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_ld_helpers[s_bits] - (tcg_target_ulong)s->code_ptr) >> 2) & 0x3fffffff)); - /* Store AREG0 in stack to avoid ugly glibc bugs that mangle - global registers */ - // delay slot - tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK, - TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE - - sizeof(long), HOST_ST_OP); - tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK, - TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE - - sizeof(long), HOST_LD_OP); - - /* data_reg = sign_extend(arg0) */ - switch(opc) { + /* delay slot */ + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[n], memi); + + n = tcg_target_call_oarg_regs[0]; + /* datalo = sign_extend(arg0) */ + switch (sizeop) { case 0 | 4: - /* sll arg0, 24/56, data_reg */ - tcg_out_arithi(s, data_reg, arg0, (int)sizeof(tcg_target_long) * 8 - 8, - HOST_SLL_OP); - /* sra data_reg, 24/56, data_reg */ - tcg_out_arithi(s, data_reg, data_reg, - (int)sizeof(tcg_target_long) * 8 - 8, HOST_SRA_OP); + /* Recall that SRA sign extends from bit 31 through bit 63. */ + tcg_out_arithi(s, datalo, n, 24, SHIFT_SLL); + tcg_out_arithi(s, datalo, datalo, 24, SHIFT_SRA); break; case 1 | 4: - /* sll arg0, 16/48, data_reg */ - tcg_out_arithi(s, data_reg, arg0, - (int)sizeof(tcg_target_long) * 8 - 16, HOST_SLL_OP); - /* sra data_reg, 16/48, data_reg */ - tcg_out_arithi(s, data_reg, data_reg, - (int)sizeof(tcg_target_long) * 8 - 16, HOST_SRA_OP); + tcg_out_arithi(s, datalo, n, 16, SHIFT_SLL); + tcg_out_arithi(s, datalo, datalo, 16, SHIFT_SRA); break; case 2 | 4: - /* sll arg0, 32, data_reg */ - tcg_out_arithi(s, data_reg, arg0, 32, HOST_SLL_OP); - /* sra data_reg, 32, data_reg */ - tcg_out_arithi(s, data_reg, data_reg, 32, HOST_SRA_OP); + tcg_out_arithi(s, datalo, n, 0, SHIFT_SRA); break; + case 3: + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_mov(s, TCG_TYPE_REG, datahi, n); + tcg_out_mov(s, TCG_TYPE_REG, datalo, n + 1); + break; + } + /* FALLTHRU */ case 0: case 1: case 2: - case 3: default: /* mov */ - tcg_out_mov(s, TCG_TYPE_REG, data_reg, arg0); + tcg_out_mov(s, TCG_TYPE_REG, datalo, n); break; } - /* will become: - ba label2 */ - label2_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, 0); - - /* nop (delay slot */ - tcg_out_nop(s); - - /* label1: */ -#if TARGET_LONG_BITS == 32 - /* be label1 */ - *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x2) | - INSN_OFF22((unsigned long)s->code_ptr - - (unsigned long)label1_ptr)); -#else - /* be,pt %xcc label1 */ - *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) | - (0x5 << 19) | INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label1_ptr)); -#endif - - /* ld [arg1 + x], arg1 */ - tcg_out_ldst(s, arg1, arg1, offsetof(CPUTLBEntry, addend) - - offsetof(CPUTLBEntry, addr_read), TARGET_ADDEND_LD_OP); - -#if TARGET_LONG_BITS == 32 - /* and addr_reg, x, arg0 */ - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, 0xffffffff); - tcg_out_arith(s, arg0, addr_reg, TCG_REG_I5, ARITH_AND); - /* add arg0, arg1, arg0 */ - tcg_out_arith(s, arg0, arg0, arg1, ARITH_ADD); + *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr - + (unsigned long)label_ptr[1]); #else - /* add addr_reg, arg1, arg0 */ - tcg_out_arith(s, arg0, addr_reg, arg1, ARITH_ADD); -#endif + addr_reg = args[addrlo_idx]; + if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, TCG_REG_T1, addr_reg, 0, SHIFT_SRL); + addr_reg = TCG_REG_T1; + } + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + int reg64 = (datalo < 16 ? datalo : TCG_REG_O0); -#else - arg0 = addr_reg; -#endif + tcg_out_ldst_rr(s, reg64, addr_reg, + (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), + qemu_ld_opc[sizeop]); - switch(opc) { - case 0: - /* ldub [arg0], data_reg */ - tcg_out_ldst(s, data_reg, arg0, 0, LDUB); - break; - case 0 | 4: - /* ldsb [arg0], data_reg */ - tcg_out_ldst(s, data_reg, arg0, 0, LDSB); - break; - case 1: -#ifdef TARGET_WORDS_BIGENDIAN - /* lduh [arg0], data_reg */ - tcg_out_ldst(s, data_reg, arg0, 0, LDUH); -#else - /* lduha [arg0] ASI_PRIMARY_LITTLE, data_reg */ - tcg_out_ldst_asi(s, data_reg, arg0, 0, LDUHA, ASI_PRIMARY_LITTLE); -#endif - break; - case 1 | 4: -#ifdef TARGET_WORDS_BIGENDIAN - /* ldsh [arg0], data_reg */ - tcg_out_ldst(s, data_reg, arg0, 0, LDSH); -#else - /* ldsha [arg0] ASI_PRIMARY_LITTLE, data_reg */ - tcg_out_ldst_asi(s, data_reg, arg0, 0, LDSHA, ASI_PRIMARY_LITTLE); -#endif - break; - case 2: -#ifdef TARGET_WORDS_BIGENDIAN - /* lduw [arg0], data_reg */ - tcg_out_ldst(s, data_reg, arg0, 0, LDUW); -#else - /* lduwa [arg0] ASI_PRIMARY_LITTLE, data_reg */ - tcg_out_ldst_asi(s, data_reg, arg0, 0, LDUWA, ASI_PRIMARY_LITTLE); -#endif - break; - case 2 | 4: -#ifdef TARGET_WORDS_BIGENDIAN - /* ldsw [arg0], data_reg */ - tcg_out_ldst(s, data_reg, arg0, 0, LDSW); -#else - /* ldswa [arg0] ASI_PRIMARY_LITTLE, data_reg */ - tcg_out_ldst_asi(s, data_reg, arg0, 0, LDSWA, ASI_PRIMARY_LITTLE); -#endif - break; - case 3: -#ifdef TARGET_WORDS_BIGENDIAN - /* ldx [arg0], data_reg */ - tcg_out_ldst(s, data_reg, arg0, 0, LDX); -#else - /* ldxa [arg0] ASI_PRIMARY_LITTLE, data_reg */ - tcg_out_ldst_asi(s, data_reg, arg0, 0, LDXA, ASI_PRIMARY_LITTLE); -#endif - break; - default: - tcg_abort(); + tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX); + if (reg64 != datalo) { + tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64); + } + } else { + tcg_out_ldst_rr(s, datalo, addr_reg, + (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), + qemu_ld_opc[sizeop]); } - -#if defined(CONFIG_SOFTMMU) - /* label2: */ - *label2_ptr = (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2) | - INSN_OFF22((unsigned long)s->code_ptr - - (unsigned long)label2_ptr)); -#endif +#endif /* CONFIG_SOFTMMU */ } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, - int opc) +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop) { - int addr_reg, data_reg, arg0, arg1, arg2, mem_index, s_bits; + int addrlo_idx = 1, datalo, datahi, addr_reg; #if defined(CONFIG_SOFTMMU) - uint32_t *label1_ptr, *label2_ptr; + int memi_idx, memi, n; + uint32_t *label_ptr; #endif - data_reg = *args++; - addr_reg = *args++; - mem_index = *args; - - s_bits = opc; - - arg0 = TCG_REG_O0; - arg1 = TCG_REG_O1; - arg2 = TCG_REG_O2; + datahi = datalo = args[0]; + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + datahi = args[1]; + addrlo_idx = 2; + } #if defined(CONFIG_SOFTMMU) - /* srl addr_reg, x, arg1 */ - tcg_out_arithi(s, arg1, addr_reg, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, - SHIFT_SRL); - - /* and addr_reg, x, arg0 */ - tcg_out_arithi(s, arg0, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1), - ARITH_AND); - - /* and arg1, x, arg1 */ - tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); - - /* add arg1, x, arg1 */ - tcg_out_addi(s, arg1, offsetof(CPUArchState, - tlb_table[mem_index][0].addr_write)); - - /* add env, arg1, arg1 */ - tcg_out_arith(s, arg1, TCG_AREG0, arg1, ARITH_ADD); + memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS); + memi = args[memi_idx]; + + addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args, + offsetof(CPUTLBEntry, addr_write)); + + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + /* Reconstruct the full 64-bit value. */ + tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL); + tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); + tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR); + datalo = TCG_REG_O2; + } - /* ld [arg1], arg2 */ - tcg_out32(s, TARGET_LD_OP | INSN_RD(arg2) | INSN_RS1(arg1) | - INSN_RS2(TCG_REG_G0)); + /* The fast path is exactly one insn. Thus we can perform the entire + TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */ + /* beq,a,pt %[xi]cc, label0 */ + label_ptr = (uint32_t *)s->code_ptr; + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) + | ((TARGET_LONG_BITS == 64) << 21) + | (1 << 29) | (1 << 19))); + /* delay slot */ + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]); + + /* TLB Miss. */ + + n = 0; + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], + args[addrlo_idx + 1]); + } + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], + args[addrlo_idx]); + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datahi); + } + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datalo); - /* subcc arg0, arg2, %g0 */ - tcg_out_arith(s, TCG_REG_G0, arg0, arg2, ARITH_SUBCC); - - /* will become: - be label1 - or - be,pt %xcc label1 */ - label1_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, 0); - - /* mov (delay slot) */ - tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg); - - /* mov */ - tcg_out_mov(s, TCG_TYPE_REG, arg1, data_reg); - - /* mov */ - tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index); - - /* XXX/FIXME: suboptimal */ - tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], - tcg_target_call_iarg_regs[2]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], - tcg_target_call_iarg_regs[1]); - tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1], - tcg_target_call_iarg_regs[0]); - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], - TCG_AREG0); - /* XXX: move that code at the end of the TB */ /* qemu_st_helper[s_bits](arg0, arg1, arg2) */ - tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[s_bits] + tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[sizeop] - (tcg_target_ulong)s->code_ptr) >> 2) & 0x3fffffff)); - /* Store AREG0 in stack to avoid ugly glibc bugs that mangle - global registers */ - // delay slot - tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK, - TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE - - sizeof(long), HOST_ST_OP); - tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK, - TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE - - sizeof(long), HOST_LD_OP); - - /* will become: - ba label2 */ - label2_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, 0); - - /* nop (delay slot) */ - tcg_out_nop(s); - -#if TARGET_LONG_BITS == 32 - /* be label1 */ - *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x2) | - INSN_OFF22((unsigned long)s->code_ptr - - (unsigned long)label1_ptr)); -#else - /* be,pt %xcc label1 */ - *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) | - (0x5 << 19) | INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label1_ptr)); -#endif - - /* ld [arg1 + x], arg1 */ - tcg_out_ldst(s, arg1, arg1, offsetof(CPUTLBEntry, addend) - - offsetof(CPUTLBEntry, addr_write), TARGET_ADDEND_LD_OP); - -#if TARGET_LONG_BITS == 32 - /* and addr_reg, x, arg0 */ - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, 0xffffffff); - tcg_out_arith(s, arg0, addr_reg, TCG_REG_I5, ARITH_AND); - /* add arg0, arg1, arg0 */ - tcg_out_arith(s, arg0, arg0, arg1, ARITH_ADD); -#else - /* add addr_reg, arg1, arg0 */ - tcg_out_arith(s, arg0, addr_reg, arg1, ARITH_ADD); -#endif + /* delay slot */ + tcg_out_movi(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n], memi); + *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr - + (unsigned long)label_ptr); #else - arg0 = addr_reg; -#endif - - switch(opc) { - case 0: - /* stb data_reg, [arg0] */ - tcg_out_ldst(s, data_reg, arg0, 0, STB); - break; - case 1: -#ifdef TARGET_WORDS_BIGENDIAN - /* sth data_reg, [arg0] */ - tcg_out_ldst(s, data_reg, arg0, 0, STH); -#else - /* stha data_reg, [arg0] ASI_PRIMARY_LITTLE */ - tcg_out_ldst_asi(s, data_reg, arg0, 0, STHA, ASI_PRIMARY_LITTLE); -#endif - break; - case 2: -#ifdef TARGET_WORDS_BIGENDIAN - /* stw data_reg, [arg0] */ - tcg_out_ldst(s, data_reg, arg0, 0, STW); -#else - /* stwa data_reg, [arg0] ASI_PRIMARY_LITTLE */ - tcg_out_ldst_asi(s, data_reg, arg0, 0, STWA, ASI_PRIMARY_LITTLE); -#endif - break; - case 3: -#ifdef TARGET_WORDS_BIGENDIAN - /* stx data_reg, [arg0] */ - tcg_out_ldst(s, data_reg, arg0, 0, STX); -#else - /* stxa data_reg, [arg0] ASI_PRIMARY_LITTLE */ - tcg_out_ldst_asi(s, data_reg, arg0, 0, STXA, ASI_PRIMARY_LITTLE); -#endif - break; - default: - tcg_abort(); + addr_reg = args[addrlo_idx]; + if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, TCG_REG_T1, addr_reg, 0, SHIFT_SRL); + addr_reg = TCG_REG_T1; } - -#if defined(CONFIG_SOFTMMU) - /* label2: */ - *label2_ptr = (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2) | - INSN_OFF22((unsigned long)s->code_ptr - - (unsigned long)label2_ptr)); -#endif + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL); + tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); + tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR); + datalo = TCG_REG_O2; + } + tcg_out_ldst_rr(s, datalo, addr_reg, + (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), + qemu_st_opc[sizeop]); +#endif /* CONFIG_SOFTMMU */ } static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, @@ -1156,39 +1069,33 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* direct jump method */ - tcg_out_sethi(s, TCG_REG_I5, args[0] & 0xffffe000); - tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I5) | - INSN_IMM13((args[0] & 0x1fff))); + uint32_t old_insn = *(uint32_t *)s->code_ptr; s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; + /* Make sure to preserve links during retranslation. */ + tcg_out32(s, CALL | (old_insn & ~INSN_OP(-1))); } else { /* indirect jump method */ - tcg_out_ld_ptr(s, TCG_REG_I5, (tcg_target_long)(s->tb_next + args[0])); - tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I5) | + tcg_out_ld_ptr(s, TCG_REG_T1, + (tcg_target_long)(s->tb_next + args[0])); + tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_T1) | INSN_RS2(TCG_REG_G0)); } tcg_out_nop(s); s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; break; case INDEX_op_call: - if (const_args[0]) + if (const_args[0]) { tcg_out32(s, CALL | ((((tcg_target_ulong)args[0] - (tcg_target_ulong)s->code_ptr) >> 2) & 0x3fffffff)); - else { - tcg_out_ld_ptr(s, TCG_REG_I5, + } else { + tcg_out_ld_ptr(s, TCG_REG_T1, (tcg_target_long)(s->tb_next + args[0])); - tcg_out32(s, JMPL | INSN_RD(TCG_REG_O7) | INSN_RS1(TCG_REG_I5) | + tcg_out32(s, JMPL | INSN_RD(TCG_REG_O7) | INSN_RS1(TCG_REG_T1) | INSN_RS2(TCG_REG_G0)); } - /* Store AREG0 in stack to avoid ugly glibc bugs that mangle - global registers */ - // delay slot - tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK, - TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE - - sizeof(long), HOST_ST_OP); - tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK, - TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE - - sizeof(long), HOST_LD_OP); + /* delay slot */ + tcg_out_nop(s); break; case INDEX_op_jmp: case INDEX_op_br: @@ -1260,13 +1167,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, goto gen_arith; case INDEX_op_shl_i32: c = SHIFT_SLL; - goto gen_arith; + do_shift32: + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out_arithc(s, args[0], args[1], args[2] & 31, const_args[2], c); + break; case INDEX_op_shr_i32: c = SHIFT_SRL; - goto gen_arith; + goto do_shift32; case INDEX_op_sar_i32: c = SHIFT_SRA; - goto gen_arith; + goto do_shift32; case INDEX_op_mul_i32: c = ARITH_UMUL; goto gen_arith; @@ -1287,11 +1197,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_rem_i32: case INDEX_op_remu_i32: - tcg_out_div32(s, TCG_REG_I5, args[1], args[2], const_args[2], + tcg_out_div32(s, TCG_REG_T1, args[1], args[2], const_args[2], opc == INDEX_op_remu_i32); - tcg_out_arithc(s, TCG_REG_I5, TCG_REG_I5, args[2], const_args[2], + tcg_out_arithc(s, TCG_REG_T1, TCG_REG_T1, args[2], const_args[2], ARITH_UMUL); - tcg_out_arith(s, args[0], args[1], TCG_REG_I5, ARITH_SUB); + tcg_out_arith(s, args[0], args[1], TCG_REG_T1, ARITH_SUB); break; case INDEX_op_brcond_i32: @@ -1356,6 +1266,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_qemu_ld(s, args, 2 | 4); break; #endif + case INDEX_op_qemu_ld64: + tcg_out_qemu_ld(s, args, 3); + break; case INDEX_op_qemu_st8: tcg_out_qemu_st(s, args, 0); break; @@ -1365,6 +1278,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_qemu_st32: tcg_out_qemu_st(s, args, 2); break; + case INDEX_op_qemu_st64: + tcg_out_qemu_st(s, args, 3); + break; #if TCG_TARGET_REG_BITS == 64 case INDEX_op_movi_i64: @@ -1381,13 +1297,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, break; case INDEX_op_shl_i64: c = SHIFT_SLLX; - goto gen_arith; + do_shift64: + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out_arithc(s, args[0], args[1], args[2] & 63, const_args[2], c); + break; case INDEX_op_shr_i64: c = SHIFT_SRLX; - goto gen_arith; + goto do_shift64; case INDEX_op_sar_i64: c = SHIFT_SRAX; - goto gen_arith; + goto do_shift64; case INDEX_op_mul_i64: c = ARITH_MULX; goto gen_arith; @@ -1399,11 +1318,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, goto gen_arith; case INDEX_op_rem_i64: case INDEX_op_remu_i64: - tcg_out_arithc(s, TCG_REG_I5, args[1], args[2], const_args[2], + tcg_out_arithc(s, TCG_REG_T1, args[1], args[2], const_args[2], opc == INDEX_op_rem_i64 ? ARITH_SDIVX : ARITH_UDIVX); - tcg_out_arithc(s, TCG_REG_I5, TCG_REG_I5, args[2], const_args[2], + tcg_out_arithc(s, TCG_REG_T1, TCG_REG_T1, args[2], const_args[2], ARITH_MULX); - tcg_out_arith(s, args[0], args[1], TCG_REG_I5, ARITH_SUB); + tcg_out_arith(s, args[0], args[1], TCG_REG_T1, ARITH_SUB); break; case INDEX_op_ext32s_i64: if (const_args[1]) { @@ -1429,13 +1348,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, args[2], const_args[2]); break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, 3); - break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, 3); - break; - #endif gen_arith: tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c); @@ -1500,20 +1412,6 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_mulu2_i32, { "r", "r", "r", "rJ" } }, #endif - { INDEX_op_qemu_ld8u, { "r", "L" } }, - { INDEX_op_qemu_ld8s, { "r", "L" } }, - { INDEX_op_qemu_ld16u, { "r", "L" } }, - { INDEX_op_qemu_ld16s, { "r", "L" } }, - { INDEX_op_qemu_ld32, { "r", "L" } }, -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_qemu_ld32u, { "r", "L" } }, - { INDEX_op_qemu_ld32s, { "r", "L" } }, -#endif - - { INDEX_op_qemu_st8, { "L", "L" } }, - { INDEX_op_qemu_st16, { "L", "L" } }, - { INDEX_op_qemu_st32, { "L", "L" } }, - #if TCG_TARGET_REG_BITS == 64 { INDEX_op_mov_i64, { "r", "r" } }, { INDEX_op_movi_i64, { "r" } }, @@ -1528,8 +1426,6 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_st16_i64, { "r", "r" } }, { INDEX_op_st32_i64, { "r", "r" } }, { INDEX_op_st_i64, { "r", "r" } }, - { INDEX_op_qemu_ld64, { "L", "L" } }, - { INDEX_op_qemu_st64, { "L", "L" } }, { INDEX_op_add_i64, { "r", "r", "rJ" } }, { INDEX_op_mul_i64, { "r", "r", "rJ" } }, @@ -1557,6 +1453,47 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_brcond_i64, { "r", "rJ" } }, { INDEX_op_setcond_i64, { "r", "r", "rJ" } }, #endif + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_qemu_ld8u, { "r", "L" } }, + { INDEX_op_qemu_ld8s, { "r", "L" } }, + { INDEX_op_qemu_ld16u, { "r", "L" } }, + { INDEX_op_qemu_ld16s, { "r", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L" } }, + { INDEX_op_qemu_ld32u, { "r", "L" } }, + { INDEX_op_qemu_ld32s, { "r", "L" } }, + { INDEX_op_qemu_ld64, { "r", "L" } }, + + { INDEX_op_qemu_st8, { "L", "L" } }, + { INDEX_op_qemu_st16, { "L", "L" } }, + { INDEX_op_qemu_st32, { "L", "L" } }, + { INDEX_op_qemu_st64, { "L", "L" } }, +#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS + { INDEX_op_qemu_ld8u, { "r", "L" } }, + { INDEX_op_qemu_ld8s, { "r", "L" } }, + { INDEX_op_qemu_ld16u, { "r", "L" } }, + { INDEX_op_qemu_ld16s, { "r", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L" } }, + { INDEX_op_qemu_ld64, { "r", "r", "L" } }, + + { INDEX_op_qemu_st8, { "L", "L" } }, + { INDEX_op_qemu_st16, { "L", "L" } }, + { INDEX_op_qemu_st32, { "L", "L" } }, + { INDEX_op_qemu_st64, { "L", "L", "L" } }, +#else + { INDEX_op_qemu_ld8u, { "r", "L", "L" } }, + { INDEX_op_qemu_ld8s, { "r", "L", "L" } }, + { INDEX_op_qemu_ld16u, { "r", "L", "L" } }, + { INDEX_op_qemu_ld16s, { "r", "L", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L", "L" } }, + { INDEX_op_qemu_ld64, { "L", "L", "L", "L" } }, + + { INDEX_op_qemu_st8, { "L", "L", "L" } }, + { INDEX_op_qemu_st16, { "L", "L", "L" } }, + { INDEX_op_qemu_st32, { "L", "L", "L" } }, + { INDEX_op_qemu_st64, { "L", "L", "L", "L" } }, +#endif + { -1 }, }; @@ -1583,25 +1520,23 @@ static void tcg_target_init(TCGContext *s) (1 << TCG_REG_O7)); tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); -#if TCG_TARGET_REG_BITS == 64 - tcg_regset_set_reg(s->reserved_regs, TCG_REG_I4); // for internal use -#endif - tcg_regset_set_reg(s->reserved_regs, TCG_REG_I5); // for internal use - tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_O7); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */ + tcg_add_target_add_op_defs(sparc_op_defs); } #if TCG_TARGET_REG_BITS == 64 # define ELF_HOST_MACHINE EM_SPARCV9 -#elif defined(__sparc_v8plus__) +#else # define ELF_HOST_MACHINE EM_SPARC32PLUS # define ELF_HOST_FLAGS EF_SPARC_32PLUS -#else -# define ELF_HOST_MACHINE EM_SPARC #endif typedef struct { @@ -1657,3 +1592,18 @@ void tcg_register_jit(void *buf, size_t buf_size) tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } + +void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ + uint32_t *ptr = (uint32_t *)jmp_addr; + tcg_target_long disp = (tcg_target_long)(addr - jmp_addr) >> 2; + + /* We can reach the entire address space for 32-bit. For 64-bit + the code_gen_buffer can't be larger than 2GB. */ + if (TCG_TARGET_REG_BITS == 64 && !check_fit_tl(disp, 30)) { + tcg_abort(); + } + + *ptr = CALL | (disp & 0x3fffffff); + flush_icache_range(jmp_addr, jmp_addr + 4); +} diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index 0ea87bef72..6314ffb303 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -66,22 +66,19 @@ typedef enum { #define TCG_CT_CONST_S13 0x200 /* used for function call generation */ -#define TCG_REG_CALL_STACK TCG_REG_I6 -#ifdef __arch64__ -// Reserve space for AREG0 -#define TCG_TARGET_STACK_MINFRAME (176 + 4 * (int)sizeof(long) + \ - TCG_STATIC_CALL_ARGS_SIZE) -#define TCG_TARGET_CALL_STACK_OFFSET (2047 - 16) -#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_REG_CALL_STACK TCG_REG_O6 + +#if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_STACK_BIAS 2047 +#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS) #else -// AREG0 + one word for alignment -#define TCG_TARGET_STACK_MINFRAME (92 + (2 + 1) * (int)sizeof(long) + \ - TCG_STATIC_CALL_ARGS_SIZE) -#define TCG_TARGET_CALL_STACK_OFFSET TCG_TARGET_STACK_MINFRAME -#define TCG_TARGET_STACK_ALIGN 8 +#define TCG_TARGET_STACK_BIAS 0 +#define TCG_TARGET_STACK_ALIGN 8 +#define TCG_TARGET_CALL_STACK_OFFSET (64 + 4 + 6*4) #endif -#ifdef __arch64__ +#if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_EXTEND_ARGS 1 #endif @@ -102,6 +99,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 +#define TCG_TARGET_HAS_movcond_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div_i64 1 @@ -123,15 +121,12 @@ typedef enum { #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 +#define TCG_TARGET_HAS_movcond_i64 0 #endif -#ifdef CONFIG_SOLARIS -#define TCG_AREG0 TCG_REG_G2 -#elif defined(__sparc_v9__) -#define TCG_AREG0 TCG_REG_G5 -#else -#define TCG_AREG0 TCG_REG_G6 -#endif +#define TCG_TARGET_HAS_GUEST_BASE + +#define TCG_AREG0 TCG_REG_I0 static inline void flush_icache_range(tcg_target_ulong start, tcg_target_ulong stop) diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 169d3b2b0d..6d28f82ad8 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -2118,6 +2118,44 @@ static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, tcg_temp_free_i64(t1); } +static inline void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, + TCGv_i32 c1, TCGv_i32 c2, + TCGv_i32 v1, TCGv_i32 v2) +{ + if (TCG_TARGET_HAS_movcond_i32) { + tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond); + } else { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + tcg_gen_setcond_i32(cond, t0, c1, c2); + tcg_gen_neg_i32(t0, t0); + tcg_gen_and_i32(t1, v1, t0); + tcg_gen_andc_i32(ret, v2, t0); + tcg_gen_or_i32(ret, ret, t1); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); + } +} + +static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, + TCGv_i64 c1, TCGv_i64 c2, + TCGv_i64 v1, TCGv_i64 v2) +{ + if (TCG_TARGET_HAS_movcond_i64) { + tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_setcond_i64(cond, t0, c1, c2); + tcg_gen_neg_i64(t0, t0); + tcg_gen_and_i64(t1, v1, t0); + tcg_gen_andc_i64(ret, v2, t0); + tcg_gen_or_i64(ret, ret, t1); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } +} + /***************************************/ /* QEMU specific operations. Their type depend on the QEMU CPU type. */ @@ -2434,6 +2472,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_deposit_tl tcg_gen_deposit_i64 #define tcg_const_tl tcg_const_i64 #define tcg_const_local_tl tcg_const_local_i64 +#define tcg_gen_movcond_tl tcg_gen_movcond_i64 #else #define tcg_gen_movi_tl tcg_gen_movi_i32 #define tcg_gen_mov_tl tcg_gen_mov_i32 @@ -2505,6 +2544,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_deposit_tl tcg_gen_deposit_i32 #define tcg_const_tl tcg_const_i32 #define tcg_const_local_tl tcg_const_local_i32 +#define tcg_gen_movcond_tl tcg_gen_movcond_i32 #endif #if TCG_TARGET_REG_BITS == 32 diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 8e06d03b17..dbb0e3916a 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -36,7 +36,7 @@ DEF(nopn, 0, 0, 1, 0) /* variable number of parameters */ DEF(discard, 1, 0, 0, 0) -DEF(set_label, 0, 0, 1, 0) +DEF(set_label, 0, 0, 1, TCG_OPF_BB_END) DEF(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */ DEF(jmp, 0, 1, 0, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) @@ -51,6 +51,7 @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) DEF(mov_i32, 1, 1, 0, 0) DEF(movi_i32, 1, 0, 1, 0) DEF(setcond_i32, 1, 2, 1, 0) +DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32)) /* load/store */ DEF(ld8u_i32, 1, 1, 1, 0) DEF(ld8s_i32, 1, 1, 1, 0) @@ -107,6 +108,7 @@ DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32)) DEF(mov_i64, 1, 1, 0, IMPL64) DEF(movi_i64, 1, 0, 1, IMPL64) DEF(setcond_i64, 1, 2, 1, IMPL64) +DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64)) /* load/store */ DEF(ld8u_i64, 1, 1, 1, IMPL64) DEF(ld8s_i64, 1, 1, 1, IMPL64) diff --git a/tcg/tcg.c b/tcg/tcg.c index a4e7f42c78..b3c265013d 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -89,7 +89,6 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, tcg_target_long arg2); static int tcg_target_const_match(tcg_target_long val, const TCGArgConstraint *arg_ct); -static int tcg_target_get_call_iarg_regs_count(int flags); TCGOpDef tcg_op_defs[] = { #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags }, @@ -937,11 +936,7 @@ void tcg_dump_ops(TCGContext *s) args[nb_oargs + i])); } } - } else if (c == INDEX_op_movi_i32 -#if TCG_TARGET_REG_BITS == 64 - || c == INDEX_op_movi_i64 -#endif - ) { + } else if (c == INDEX_op_movi_i32 || c == INDEX_op_movi_i64) { tcg_target_ulong val; TCGHelperInfo *th; @@ -991,17 +986,13 @@ void tcg_dump_ops(TCGContext *s) } switch (c) { case INDEX_op_brcond_i32: -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_brcond2_i32: -#elif TCG_TARGET_REG_BITS == 64 - case INDEX_op_brcond_i64: -#endif case INDEX_op_setcond_i32: -#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_movcond_i32: + case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: -#elif TCG_TARGET_REG_BITS == 64 + case INDEX_op_brcond_i64: case INDEX_op_setcond_i64: -#endif + case INDEX_op_movcond_i64: if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) { qemu_log(",%s", cond_name[args[k++]]); } else { @@ -1297,11 +1288,6 @@ static void tcg_liveness_analysis(TCGContext *s) args--; } break; - case INDEX_op_set_label: - args--; - /* mark end of basic block */ - tcg_la_bb_end(s, dead_temps); - break; case INDEX_op_debug_insn_start: args -= def->nb_args; break; @@ -1463,7 +1449,8 @@ static void temp_allocate_frame(TCGContext *s, int temp) { TCGTemp *ts; ts = &s->temps[temp]; -#ifndef __sparc_v9__ /* Sparc64 stack is accessed with offset of 2047 */ +#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) + /* Sparc64 stack is accessed with offset of 2047 */ s->current_frame_offset = (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) - 1) & ~(sizeof(tcg_target_long) - 1); @@ -1866,7 +1853,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, flags = args[nb_oargs + nb_iargs]; - nb_regs = tcg_target_get_call_iarg_regs_count(flags); + nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); if (nb_regs > nb_params) nb_regs = nb_params; @@ -2108,16 +2095,12 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, #endif switch(opc) { case INDEX_op_mov_i32: -#if TCG_TARGET_REG_BITS == 64 case INDEX_op_mov_i64: -#endif dead_args = s->op_dead_args[op_index]; tcg_reg_alloc_mov(s, def, args, dead_args); break; case INDEX_op_movi_i32: -#if TCG_TARGET_REG_BITS == 64 case INDEX_op_movi_i64: -#endif tcg_reg_alloc_movi(s, args); break; case INDEX_op_debug_insn_start: diff --git a/tcg/tcg.h b/tcg/tcg.h index 7a72729f3a..48a56f0b15 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -79,6 +79,7 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 +#define TCG_TARGET_HAS_movcond_i64 0 #endif #ifndef TCG_TARGET_deposit_i32_valid @@ -343,7 +344,7 @@ struct TCGContext { /* goto_tb support */ uint8_t *code_buf; - unsigned long *tb_next; + uintptr_t *tb_next; uint16_t *tb_next_offset; uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */ @@ -459,11 +460,6 @@ static inline TCGv_i64 tcg_temp_local_new_i64(void) void tcg_temp_free_i64(TCGv_i64 arg); char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg); -static inline bool tcg_arg_is_local(TCGContext *s, TCGArg arg) -{ - return s->temps[arg].temp_local; -} - #if defined(CONFIG_DEBUG_TCG) /* If you call tcg_clear_temp_count() at the start of a section of * code which is not supposed to leak any TCG temporaries, then diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c index 003244cb0b..3f4a24bb8b 100644 --- a/tcg/tci/tcg-target.c +++ b/tcg/tci/tcg-target.c @@ -863,12 +863,6 @@ static int tcg_target_const_match(tcg_target_long val, return arg_ct->ct & TCG_CT_CONST; } -/* Maximum number of register used for input function arguments. */ -static int tcg_target_get_call_iarg_regs_count(int flags) -{ - return ARRAY_SIZE(tcg_target_call_iarg_regs); -} - static void tcg_target_init(TCGContext *s) { #if defined(CONFIG_DEBUG_TCG_INTERPRETER) diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 30a0f21596..6d894953aa 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -75,6 +75,7 @@ #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_rot_i32 1 +#define TCG_TARGET_HAS_movcond_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_bswap16_i64 1 @@ -98,6 +99,7 @@ #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_orc_i64 0 #define TCG_TARGET_HAS_rot_i64 1 +#define TCG_TARGET_HAS_movcond_i64 0 #endif /* TCG_TARGET_REG_BITS == 64 */ /* Offset to user memory in user mode. */ diff --git a/trace-events b/trace-events index b25ae1c437..f5b5097552 100644 --- a/trace-events +++ b/trace-events @@ -243,9 +243,12 @@ usb_port_release(int bus, const char *port) "bus %d, port %s" # hw/usb/hcd-ehci.c usb_ehci_reset(void) "=== RESET ===" -usb_ehci_mmio_readl(uint32_t addr, const char *str, uint32_t val) "rd mmio %04x [%s] = %x" -usb_ehci_mmio_writel(uint32_t addr, const char *str, uint32_t val) "wr mmio %04x [%s] = %x" -usb_ehci_mmio_change(uint32_t addr, const char *str, uint32_t new, uint32_t old) "ch mmio %04x [%s] = %x (old: %x)" +usb_ehci_opreg_read(uint32_t addr, const char *str, uint32_t val) "rd mmio %04x [%s] = %x" +usb_ehci_opreg_write(uint32_t addr, const char *str, uint32_t val) "wr mmio %04x [%s] = %x" +usb_ehci_opreg_change(uint32_t addr, const char *str, uint32_t new, uint32_t old) "ch mmio %04x [%s] = %x (old: %x)" +usb_ehci_portsc_read(uint32_t addr, uint32_t port, uint32_t val) "rd mmio %04x [port %d] = %x" +usb_ehci_portsc_write(uint32_t addr, uint32_t port, uint32_t val) "wr mmio %04x [port %d] = %x" +usb_ehci_portsc_change(uint32_t addr, uint32_t port, uint32_t new, uint32_t old) "ch mmio %04x [port %d] = %x (old: %x)" usb_ehci_usbsts(const char *sts, int state) "usbsts %s %d" usb_ehci_state(const char *schedule, const char *state) "%s schedule %s" usb_ehci_qh_ptrs(void *q, uint32_t addr, uint32_t nxt, uint32_t c_qtd, uint32_t n_qtd, uint32_t a_qtd) "q %p - QH @ %08x: next %08x qtds %08x,%08x,%08x" @@ -932,8 +935,9 @@ qxl_interface_update_area_complete_rest(int qid, uint32_t num_updated_rects) "%d qxl_interface_update_area_complete_overflow(int qid, int max) "%d max=%d" qxl_interface_update_area_complete_schedule_bh(int qid, uint32_t num_dirty) "%d #dirty=%d" qxl_io_destroy_primary_ignored(int qid, const char *mode) "%d %s" +qxl_io_log(int qid, const uint8_t *log_buf) "%d %s" qxl_io_read_unexpected(int qid) "%d" -qxl_io_unexpected_vga_mode(int qid, uint32_t io_port, const char *desc) "%d 0x%x (%s)" +qxl_io_unexpected_vga_mode(int qid, uint64_t addr, uint64_t val, const char *desc) "%d 0x%"PRIx64"=%"PRIu64" (%s)" qxl_io_write(int qid, const char *mode, uint64_t addr, uint64_t val, unsigned size, int async) "%d %s addr=%"PRIu64 " val=%"PRIu64" size=%u async=%d" qxl_memslot_add_guest(int qid, uint32_t slot_id, uint64_t guest_start, uint64_t guest_end) "%d %u: guest phys 0x%"PRIx64 " - 0x%" PRIx64 qxl_post_load(int qid, const char *mode) "%d %s" @@ -964,7 +968,7 @@ qxl_spice_destroy_surfaces(int qid, int async) "%d async=%d" qxl_spice_destroy_surface_wait_complete(int qid, uint32_t id) "%d sid=%d" qxl_spice_destroy_surface_wait(int qid, uint32_t id, int async) "%d sid=%d async=%d" qxl_spice_flush_surfaces_async(int qid, uint32_t surface_count, uint32_t num_free_res) "%d s#=%d, res#=%d" -qxl_spice_monitors_config(int id) "%d" +qxl_spice_monitors_config(int qid) "%d" qxl_spice_loadvm_commands(int qid, void *ext, uint32_t count) "%d ext=%p count=%d" qxl_spice_oom(int qid) "%d" qxl_spice_reset_cursor(int qid) "%d" @@ -973,6 +977,12 @@ qxl_spice_reset_memslots(int qid) "%d" qxl_spice_update_area(int qid, uint32_t surface_id, uint32_t left, uint32_t right, uint32_t top, uint32_t bottom) "%d sid=%d [%d,%d,%d,%d]" qxl_spice_update_area_rest(int qid, uint32_t num_dirty_rects, uint32_t clear_dirty_region) "%d #d=%d clear=%d" qxl_surfaces_dirty(int qid, int surface, int offset, int size) "%d surface=%d offset=%d size=%d" +qxl_send_events(int qid, uint32_t events) "%d %d" +qxl_set_guest_bug(int qid) "%d" +qxl_interrupt_client_monitors_config(int qid, int num_heads, void *heads) "%d %d %p" +qxl_client_monitors_config_unsupported_by_guest(int qid, uint32_t int_mask, void *client_monitors_config) "%d %X %p" +qxl_client_monitors_config_capped(int qid, int requested, int limit) "%d %d %d" +qxl_client_monitors_config_crc(int qid, unsigned size, uint32_t crc32) "%d %u %u" # hw/qxl-render.c qxl_render_blit_guest_primary_initialized(void) "" diff --git a/ui/spice-display.c b/ui/spice-display.c index 99bc665bc7..50fbefb067 100644 --- a/ui/spice-display.c +++ b/ui/spice-display.c @@ -164,34 +164,31 @@ int qemu_spice_display_is_running(SimpleSpiceDisplay *ssd) #endif } -static SimpleSpiceUpdate *qemu_spice_create_update(SimpleSpiceDisplay *ssd) +static void qemu_spice_create_one_update(SimpleSpiceDisplay *ssd, + QXLRect *rect) { SimpleSpiceUpdate *update; QXLDrawable *drawable; QXLImage *image; QXLCommand *cmd; - uint8_t *src, *dst; - int by, bw, bh; + uint8_t *src, *mirror, *dst; + int by, bw, bh, offset, bytes; struct timespec time_space; - if (qemu_spice_rect_is_empty(&ssd->dirty)) { - return NULL; - }; - trace_qemu_spice_create_update( - ssd->dirty.left, ssd->dirty.right, - ssd->dirty.top, ssd->dirty.bottom); + rect->left, rect->right, + rect->top, rect->bottom); update = g_malloc0(sizeof(*update)); drawable = &update->drawable; image = &update->image; cmd = &update->ext.cmd; - bw = ssd->dirty.right - ssd->dirty.left; - bh = ssd->dirty.bottom - ssd->dirty.top; + bw = rect->right - rect->left; + bh = rect->bottom - rect->top; update->bitmap = g_malloc(bw * bh * 4); - drawable->bbox = ssd->dirty; + drawable->bbox = *rect; drawable->clip.type = SPICE_CLIP_TYPE_NONE; drawable->effect = QXL_EFFECT_OPAQUE; drawable->release_info.id = (uintptr_t)update; @@ -219,31 +216,103 @@ static SimpleSpiceUpdate *qemu_spice_create_update(SimpleSpiceDisplay *ssd) image->bitmap.palette = 0; image->bitmap.format = SPICE_BITMAP_FMT_32BIT; - if (ssd->conv == NULL) { - PixelFormat dst = qemu_default_pixelformat(32); - ssd->conv = qemu_pf_conv_get(&dst, &ssd->ds->surface->pf); - assert(ssd->conv); - } - - src = ds_get_data(ssd->ds) + - ssd->dirty.top * ds_get_linesize(ssd->ds) + - ssd->dirty.left * ds_get_bytes_per_pixel(ssd->ds); + offset = + rect->top * ds_get_linesize(ssd->ds) + + rect->left * ds_get_bytes_per_pixel(ssd->ds); + bytes = ds_get_bytes_per_pixel(ssd->ds) * bw; + src = ds_get_data(ssd->ds) + offset; + mirror = ssd->ds_mirror + offset; dst = update->bitmap; for (by = 0; by < bh; by++) { - qemu_pf_conv_run(ssd->conv, dst, src, bw); + memcpy(mirror, src, bytes); + qemu_pf_conv_run(ssd->conv, dst, mirror, bw); src += ds_get_linesize(ssd->ds); + mirror += ds_get_linesize(ssd->ds); dst += image->bitmap.stride; } cmd->type = QXL_CMD_DRAW; cmd->data = (uintptr_t)drawable; + QTAILQ_INSERT_TAIL(&ssd->updates, update, next); +} + +static void qemu_spice_create_update(SimpleSpiceDisplay *ssd) +{ + static const int blksize = 32; + int blocks = (ds_get_width(ssd->ds) + blksize - 1) / blksize; + int dirty_top[blocks]; + int y, yoff, x, xoff, blk, bw; + int bpp = ds_get_bytes_per_pixel(ssd->ds); + uint8_t *guest, *mirror; + + if (qemu_spice_rect_is_empty(&ssd->dirty)) { + return; + }; + + if (ssd->conv == NULL) { + PixelFormat dst = qemu_default_pixelformat(32); + ssd->conv = qemu_pf_conv_get(&dst, &ssd->ds->surface->pf); + assert(ssd->conv); + } + if (ssd->ds_mirror == NULL) { + int size = ds_get_height(ssd->ds) * ds_get_linesize(ssd->ds); + ssd->ds_mirror = g_malloc0(size); + } + + for (blk = 0; blk < blocks; blk++) { + dirty_top[blk] = -1; + } + + guest = ds_get_data(ssd->ds); + mirror = ssd->ds_mirror; + for (y = ssd->dirty.top; y < ssd->dirty.bottom; y++) { + yoff = y * ds_get_linesize(ssd->ds); + for (x = ssd->dirty.left; x < ssd->dirty.right; x += blksize) { + xoff = x * bpp; + blk = x / blksize; + bw = MIN(blksize, ssd->dirty.right - x); + if (memcmp(guest + yoff + xoff, + mirror + yoff + xoff, + bw * bpp) == 0) { + if (dirty_top[blk] != -1) { + QXLRect update = { + .top = dirty_top[blk], + .bottom = y, + .left = x, + .right = x + bw, + }; + qemu_spice_create_one_update(ssd, &update); + dirty_top[blk] = -1; + } + } else { + if (dirty_top[blk] == -1) { + dirty_top[blk] = y; + } + } + } + } + + for (x = ssd->dirty.left; x < ssd->dirty.right; x += blksize) { + blk = x / blksize; + bw = MIN(blksize, ssd->dirty.right - x); + if (dirty_top[blk] != -1) { + QXLRect update = { + .top = dirty_top[blk], + .bottom = ssd->dirty.bottom, + .left = x, + .right = x + bw, + }; + qemu_spice_create_one_update(ssd, &update); + dirty_top[blk] = -1; + } + } + memset(&ssd->dirty, 0, sizeof(ssd->dirty)); - return update; } /* - * Called from spice server thread context (via interface_release_ressource) + * Called from spice server thread context (via interface_release_resource) * We do *not* hold the global qemu mutex here, so extra care is needed * when calling qemu functions. QEMU interfaces used: * - g_free (underlying glibc free is re-entrant). @@ -315,6 +384,7 @@ void qemu_spice_display_init_common(SimpleSpiceDisplay *ssd, DisplayState *ds) { ssd->ds = ds; qemu_mutex_init(&ssd->lock); + QTAILQ_INIT(&ssd->updates); ssd->mouse_x = -1; ssd->mouse_y = -1; if (ssd->num_surfaces == 0) { @@ -345,16 +415,20 @@ void qemu_spice_display_update(SimpleSpiceDisplay *ssd, void qemu_spice_display_resize(SimpleSpiceDisplay *ssd) { + SimpleSpiceUpdate *update; + dprint(1, "%s:\n", __FUNCTION__); memset(&ssd->dirty, 0, sizeof(ssd->dirty)); qemu_pf_conv_put(ssd->conv); ssd->conv = NULL; + g_free(ssd->ds_mirror); + ssd->ds_mirror = NULL; qemu_mutex_lock(&ssd->lock); - if (ssd->update != NULL) { - qemu_spice_destroy_update(ssd, ssd->update); - ssd->update = NULL; + while ((update = QTAILQ_FIRST(&ssd->updates)) != NULL) { + QTAILQ_REMOVE(&ssd->updates, update, next); + qemu_spice_destroy_update(ssd, update); } qemu_mutex_unlock(&ssd->lock); qemu_spice_destroy_host_primary(ssd); @@ -384,8 +458,8 @@ void qemu_spice_display_refresh(SimpleSpiceDisplay *ssd) vga_hw_update(); qemu_mutex_lock(&ssd->lock); - if (ssd->update == NULL) { - ssd->update = qemu_spice_create_update(ssd); + if (QTAILQ_EMPTY(&ssd->updates)) { + qemu_spice_create_update(ssd); ssd->notify++; } qemu_spice_cursor_refresh_unlocked(ssd); @@ -442,9 +516,9 @@ static int interface_get_command(QXLInstance *sin, struct QXLCommandExt *ext) dprint(3, "%s:\n", __FUNCTION__); qemu_mutex_lock(&ssd->lock); - if (ssd->update != NULL) { - update = ssd->update; - ssd->update = NULL; + update = QTAILQ_FIRST(&ssd->updates); + if (update != NULL) { + QTAILQ_REMOVE(&ssd->updates, update, next); *ext = update->ext; ret = true; } diff --git a/ui/spice-display.h b/ui/spice-display.h index 512ab7831b..dea41c1b71 100644 --- a/ui/spice-display.h +++ b/ui/spice-display.h @@ -72,6 +72,7 @@ typedef struct SimpleSpiceUpdate SimpleSpiceUpdate; struct SimpleSpiceDisplay { DisplayState *ds; + uint8_t *ds_mirror; void *buf; int bufsize; QXLWorker *worker; @@ -92,7 +93,7 @@ struct SimpleSpiceDisplay { * to them must be protected by the lock. */ QemuMutex lock; - SimpleSpiceUpdate *update; + QTAILQ_HEAD(, SimpleSpiceUpdate) updates; QEMUCursor *cursor; int mouse_x, mouse_y; }; @@ -102,6 +103,7 @@ struct SimpleSpiceUpdate { QXLImage image; QXLCommandExt ext; uint8_t *bitmap; + QTAILQ_ENTRY(SimpleSpiceUpdate) next; }; int qemu_spice_rect_is_empty(const QXLRect* r); |