diff options
79 files changed, 5086 insertions, 1686 deletions
diff --git a/Kconfig.host b/Kconfig.host index d763d89269..2ee71578f3 100644 --- a/Kconfig.host +++ b/Kconfig.host @@ -46,3 +46,6 @@ config FUZZ config VFIO_USER_SERVER_ALLOWED bool imply VFIO_USER_SERVER + +config HV_BALLOON_POSSIBLE + bool diff --git a/MAINTAINERS b/MAINTAINERS index 8e8a7d5be5..d4a480ce5a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2656,6 +2656,14 @@ F: hw/usb/canokey.c F: hw/usb/canokey.h F: docs/system/devices/canokey.rst +Hyper-V Dynamic Memory Protocol +M: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> +S: Supported +F: hw/hyperv/hv-balloon*.c +F: hw/hyperv/hv-balloon*.h +F: include/hw/hyperv/dynmem-proto.h +F: include/hw/hyperv/hv-balloon.h + Subsystems ---------- Overall Audio backends diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst index 240eb16d90..5adf4f12f7 100644 --- a/docs/devel/migration.rst +++ b/docs/devel/migration.rst @@ -594,6 +594,77 @@ path. Return path - opened by main thread, written by main thread AND postcopy thread (protected by rp_mutex) +Dirty limit +===================== +The dirty limit, short for dirty page rate upper limit, is a new capability +introduced in the 8.1 QEMU release that uses a new algorithm based on the KVM +dirty ring to throttle down the guest during live migration. + +The algorithm framework is as follows: + +:: + + ------------------------------------------------------------------------------ + main --------------> throttle thread ------------> PREPARE(1) <-------- + thread \ | | + \ | | + \ V | + -\ CALCULATE(2) | + \ | | + \ | | + \ V | + \ SET PENALTY(3) ----- + -\ | + \ | + \ V + -> virtual CPU thread -------> ACCEPT PENALTY(4) + ------------------------------------------------------------------------------ + +When the qmp command qmp_set_vcpu_dirty_limit is called for the first time, +the QEMU main thread starts the throttle thread. The throttle thread, once +launched, executes the loop, which consists of three steps: + + - PREPARE (1) + + The entire work of PREPARE (1) is preparation for the second stage, + CALCULATE(2), as the name implies. It involves preparing the dirty + page rate value and the corresponding upper limit of the VM: + The dirty page rate is calculated via the KVM dirty ring mechanism, + which tells QEMU how many dirty pages a virtual CPU has had since the + last KVM_EXIT_DIRTY_RING_FULL exception; The dirty page rate upper + limit is specified by caller, therefore fetch it directly. + + - CALCULATE (2) + + Calculate a suitable sleep period for each virtual CPU, which will be + used to determine the penalty for the target virtual CPU. The + computation must be done carefully in order to reduce the dirty page + rate progressively down to the upper limit without oscillation. To + achieve this, two strategies are provided: the first is to add or + subtract sleep time based on the ratio of the current dirty page rate + to the limit, which is used when the current dirty page rate is far + from the limit; the second is to add or subtract a fixed time when + the current dirty page rate is close to the limit. + + - SET PENALTY (3) + + Set the sleep time for each virtual CPU that should be penalized based + on the results of the calculation supplied by step CALCULATE (2). + +After completing the three above stages, the throttle thread loops back +to step PREPARE (1) until the dirty limit is reached. + +On the other hand, each virtual CPU thread reads the sleep duration and +sleeps in the path of the KVM_EXIT_DIRTY_RING_FULL exception handler, that +is ACCEPT PENALTY (4). Virtual CPUs tied with writing processes will +obviously exit to the path and get penalized, whereas virtual CPUs involved +with read processes will not. + +In summary, thanks to the KVM dirty ring technology, the dirty limit +algorithm will restrict virtual CPUs as needed to keep their dirty page +rate inside the limit. This leads to more steady reading performance during +live migration and can aid in improving large guest responsiveness. + Postcopy ======== diff --git a/dump/dump-hmp-cmds.c b/dump/dump-hmp-cmds.c index b038785fee..b428ec33df 100644 --- a/dump/dump-hmp-cmds.c +++ b/dump/dump-hmp-cmds.c @@ -19,6 +19,7 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict) bool paging = qdict_get_try_bool(qdict, "paging", false); bool zlib = qdict_get_try_bool(qdict, "zlib", false); bool lzo = qdict_get_try_bool(qdict, "lzo", false); + bool raw = qdict_get_try_bool(qdict, "raw", false); bool snappy = qdict_get_try_bool(qdict, "snappy", false); const char *file = qdict_get_str(qdict, "filename"); bool has_begin = qdict_haskey(qdict, "begin"); @@ -40,16 +41,28 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict) dump_format = DUMP_GUEST_MEMORY_FORMAT_WIN_DMP; } - if (zlib) { - dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB; + if (zlib && raw) { + if (raw) { + dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_ZLIB; + } else { + dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB; + } } if (lzo) { - dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO; + if (raw) { + dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_LZO; + } else { + dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO; + } } if (snappy) { - dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY; + if (raw) { + dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_SNAPPY; + } else { + dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY; + } } if (has_begin) { diff --git a/dump/dump.c b/dump/dump.c index d355ada62e..1c304cadfd 100644 --- a/dump/dump.c +++ b/dump/dump.c @@ -100,7 +100,7 @@ static int dump_cleanup(DumpState *s) memory_mapping_list_free(&s->list); close(s->fd); g_free(s->guest_note); - g_array_unref(s->string_table_buf); + g_clear_pointer(&s->string_table_buf, g_array_unref); s->guest_note = NULL; if (s->resume) { if (s->detached) { @@ -809,11 +809,15 @@ static void create_vmcore(DumpState *s, Error **errp) dump_end(s, errp); } -static int write_start_flat_header(int fd) +static int write_start_flat_header(DumpState *s) { MakedumpfileHeader *mh; int ret = 0; + if (s->kdump_raw) { + return 0; + } + QEMU_BUILD_BUG_ON(sizeof *mh > MAX_SIZE_MDF_HEADER); mh = g_malloc0(MAX_SIZE_MDF_HEADER); @@ -824,7 +828,7 @@ static int write_start_flat_header(int fd) mh->version = cpu_to_be64(VERSION_FLAT_HEADER); size_t written_size; - written_size = qemu_write_full(fd, mh, MAX_SIZE_MDF_HEADER); + written_size = qemu_write_full(s->fd, mh, MAX_SIZE_MDF_HEADER); if (written_size != MAX_SIZE_MDF_HEADER) { ret = -1; } @@ -833,15 +837,19 @@ static int write_start_flat_header(int fd) return ret; } -static int write_end_flat_header(int fd) +static int write_end_flat_header(DumpState *s) { MakedumpfileDataHeader mdh; + if (s->kdump_raw) { + return 0; + } + mdh.offset = END_FLAG_FLAT_HEADER; mdh.buf_size = END_FLAG_FLAT_HEADER; size_t written_size; - written_size = qemu_write_full(fd, &mdh, sizeof(mdh)); + written_size = qemu_write_full(s->fd, &mdh, sizeof(mdh)); if (written_size != sizeof(mdh)) { return -1; } @@ -849,20 +857,28 @@ static int write_end_flat_header(int fd) return 0; } -static int write_buffer(int fd, off_t offset, const void *buf, size_t size) +static int write_buffer(DumpState *s, off_t offset, const void *buf, size_t size) { size_t written_size; MakedumpfileDataHeader mdh; + off_t seek_loc; - mdh.offset = cpu_to_be64(offset); - mdh.buf_size = cpu_to_be64(size); + if (s->kdump_raw) { + seek_loc = lseek(s->fd, offset, SEEK_SET); + if (seek_loc == (off_t) -1) { + return -1; + } + } else { + mdh.offset = cpu_to_be64(offset); + mdh.buf_size = cpu_to_be64(size); - written_size = qemu_write_full(fd, &mdh, sizeof(mdh)); - if (written_size != sizeof(mdh)) { - return -1; + written_size = qemu_write_full(s->fd, &mdh, sizeof(mdh)); + if (written_size != sizeof(mdh)) { + return -1; + } } - written_size = qemu_write_full(fd, buf, size); + written_size = qemu_write_full(s->fd, buf, size); if (written_size != size) { return -1; } @@ -982,7 +998,7 @@ static void create_header32(DumpState *s, Error **errp) #endif dh->status = cpu_to_dump32(s, status); - if (write_buffer(s->fd, 0, dh, size) < 0) { + if (write_buffer(s, 0, dh, size) < 0) { error_setg(errp, "dump: failed to write disk dump header"); goto out; } @@ -1012,7 +1028,7 @@ static void create_header32(DumpState *s, Error **errp) kh->offset_note = cpu_to_dump64(s, offset_note); kh->note_size = cpu_to_dump32(s, s->note_size); - if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS * + if (write_buffer(s, DISKDUMP_HEADER_BLOCKS * block_size, kh, size) < 0) { error_setg(errp, "dump: failed to write kdump sub header"); goto out; @@ -1027,7 +1043,7 @@ static void create_header32(DumpState *s, Error **errp) if (*errp) { goto out; } - if (write_buffer(s->fd, offset_note, s->note_buf, + if (write_buffer(s, offset_note, s->note_buf, s->note_size) < 0) { error_setg(errp, "dump: failed to write notes"); goto out; @@ -1093,7 +1109,7 @@ static void create_header64(DumpState *s, Error **errp) #endif dh->status = cpu_to_dump32(s, status); - if (write_buffer(s->fd, 0, dh, size) < 0) { + if (write_buffer(s, 0, dh, size) < 0) { error_setg(errp, "dump: failed to write disk dump header"); goto out; } @@ -1123,7 +1139,7 @@ static void create_header64(DumpState *s, Error **errp) kh->offset_note = cpu_to_dump64(s, offset_note); kh->note_size = cpu_to_dump64(s, s->note_size); - if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS * + if (write_buffer(s, DISKDUMP_HEADER_BLOCKS * block_size, kh, size) < 0) { error_setg(errp, "dump: failed to write kdump sub header"); goto out; @@ -1139,7 +1155,7 @@ static void create_header64(DumpState *s, Error **errp) goto out; } - if (write_buffer(s->fd, offset_note, s->note_buf, + if (write_buffer(s, offset_note, s->note_buf, s->note_size) < 0) { error_setg(errp, "dump: failed to write notes"); goto out; @@ -1204,7 +1220,7 @@ static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value, while (old_offset < new_offset) { /* calculate the offset and write dump_bitmap */ offset_bitmap1 = s->offset_dump_bitmap + old_offset; - if (write_buffer(s->fd, offset_bitmap1, buf, + if (write_buffer(s, offset_bitmap1, buf, bitmap_bufsize) < 0) { return -1; } @@ -1212,7 +1228,7 @@ static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value, /* dump level 1 is chosen, so 1st and 2nd bitmap are same */ offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap + old_offset; - if (write_buffer(s->fd, offset_bitmap2, buf, + if (write_buffer(s, offset_bitmap2, buf, bitmap_bufsize) < 0) { return -1; } @@ -1380,7 +1396,7 @@ out: static void prepare_data_cache(DataCache *data_cache, DumpState *s, off_t offset) { - data_cache->fd = s->fd; + data_cache->state = s; data_cache->data_size = 0; data_cache->buf_size = 4 * dump_bitmap_get_bufsize(s); data_cache->buf = g_malloc0(data_cache->buf_size); @@ -1399,11 +1415,11 @@ static int write_cache(DataCache *dc, const void *buf, size_t size, /* * if flag_sync is set, synchronize data in dc->buf into vmcore. * otherwise check if the space is enough for caching data in buf, if not, - * write the data in dc->buf to dc->fd and reset dc->buf + * write the data in dc->buf to dc->state->fd and reset dc->buf */ if ((!flag_sync && dc->data_size + size > dc->buf_size) || (flag_sync && dc->data_size > 0)) { - if (write_buffer(dc->fd, dc->offset, dc->buf, dc->data_size) < 0) { + if (write_buffer(dc->state, dc->offset, dc->buf, dc->data_size) < 0) { return -1; } @@ -1644,7 +1660,7 @@ static void create_kdump_vmcore(DumpState *s, Error **errp) * +------------------------------------------+ */ - ret = write_start_flat_header(s->fd); + ret = write_start_flat_header(s); if (ret < 0) { error_setg(errp, "dump: failed to write start flat header"); return; @@ -1665,33 +1681,13 @@ static void create_kdump_vmcore(DumpState *s, Error **errp) return; } - ret = write_end_flat_header(s->fd); + ret = write_end_flat_header(s); if (ret < 0) { error_setg(errp, "dump: failed to write end flat header"); return; } } -static int validate_start_block(DumpState *s) -{ - GuestPhysBlock *block; - - if (!dump_has_filter(s)) { - return 0; - } - - QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { - /* This block is out of the range */ - if (block->target_start >= s->filter_area_begin + s->filter_area_length || - block->target_end <= s->filter_area_begin) { - continue; - } - return 0; - } - - return -1; -} - static void get_max_mapnr(DumpState *s) { GuestPhysBlock *last_block; @@ -1775,7 +1771,8 @@ static void vmcoreinfo_update_phys_base(DumpState *s) static void dump_init(DumpState *s, int fd, bool has_format, DumpGuestMemoryFormat format, bool paging, bool has_filter, - int64_t begin, int64_t length, Error **errp) + int64_t begin, int64_t length, bool kdump_raw, + Error **errp) { ERRP_GUARD(); VMCoreInfoState *vmci = vmcoreinfo_find(); @@ -1786,6 +1783,7 @@ static void dump_init(DumpState *s, int fd, bool has_format, s->has_format = has_format; s->format = format; s->written_size = 0; + s->kdump_raw = kdump_raw; /* kdump-compressed is conflict with paging and filter */ if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) { @@ -1810,7 +1808,7 @@ static void dump_init(DumpState *s, int fd, bool has_format, s->fd = fd; if (has_filter && !length) { - error_setg(errp, QERR_INVALID_PARAMETER, "length"); + error_setg(errp, "parameter 'length' expects a non-zero size"); goto cleanup; } s->filter_area_begin = begin; @@ -1839,12 +1837,6 @@ static void dump_init(DumpState *s, int fd, bool has_format, goto cleanup; } - /* Is the filter filtering everything? */ - if (validate_start_block(s) == -1) { - error_setg(errp, QERR_INVALID_PARAMETER, "begin"); - goto cleanup; - } - /* get dump info: endian, class and architecture. * If the target architecture is not supported, cpu_get_dump_info() will * return -1. @@ -2061,17 +2053,19 @@ DumpQueryResult *qmp_query_dump(Error **errp) return result; } -void qmp_dump_guest_memory(bool paging, const char *file, +void qmp_dump_guest_memory(bool paging, const char *protocol, bool has_detach, bool detach, - bool has_begin, int64_t begin, bool has_length, - int64_t length, bool has_format, - DumpGuestMemoryFormat format, Error **errp) + bool has_begin, int64_t begin, + bool has_length, int64_t length, + bool has_format, DumpGuestMemoryFormat format, + Error **errp) { ERRP_GUARD(); const char *p; - int fd = -1; + int fd; DumpState *s; bool detach_p = false; + bool kdump_raw = false; if (runstate_check(RUN_STATE_INMIGRATE)) { error_setg(errp, "Dump not allowed during incoming migration."); @@ -2086,6 +2080,29 @@ void qmp_dump_guest_memory(bool paging, const char *file, } /* + * externally, we represent kdump-raw-* as separate formats, but internally + * they are handled the same, except for the "raw" flag + */ + if (has_format) { + switch (format) { + case DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_ZLIB: + format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB; + kdump_raw = true; + break; + case DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_LZO: + format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO; + kdump_raw = true; + break; + case DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_SNAPPY: + format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY; + kdump_raw = true; + break; + default: + break; + } + } + + /* * kdump-compressed format need the whole memory dumped, so paging or * filter is not supported here. */ @@ -2127,25 +2144,23 @@ void qmp_dump_guest_memory(bool paging, const char *file, return; } -#if !defined(WIN32) - if (strstart(file, "fd:", &p)) { + if (strstart(protocol, "fd:", &p)) { fd = monitor_get_fd(monitor_cur(), p, errp); if (fd == -1) { return; } - } -#endif - - if (strstart(file, "file:", &p)) { - fd = qemu_open_old(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR); + } else if (strstart(protocol, "file:", &p)) { + fd = qemu_create(p, O_WRONLY | O_TRUNC | O_BINARY, S_IRUSR, errp); if (fd < 0) { - error_setg_file_open(errp, errno, p); return; } + } else { + error_setg(errp, + "parameter 'protocol' must start with 'file:' or 'fd:'"); + return; } - - if (fd == -1) { - error_setg(errp, QERR_INVALID_PARAMETER, "protocol"); + if (kdump_raw && lseek(fd, 0, SEEK_CUR) == (off_t) -1) { + error_setg(errp, "kdump-raw formats require a seekable file"); return; } @@ -2168,7 +2183,7 @@ void qmp_dump_guest_memory(bool paging, const char *file, dump_state_prepare(s); dump_init(s, fd, has_format, format, paging, has_begin, - begin, length, errp); + begin, length, kdump_raw, errp); if (*errp) { qatomic_set(&s->status, DUMP_STATUS_FAILED); return; @@ -2196,15 +2211,18 @@ DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp) /* kdump-zlib is always available */ QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB); + QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_ZLIB); /* add new item if kdump-lzo is available */ #ifdef CONFIG_LZO QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO); + QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_LZO); #endif /* add new item if kdump-snappy is available */ #ifdef CONFIG_SNAPPY QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY); + QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_SNAPPY); #endif if (win_dump_available(NULL)) { diff --git a/hmp-commands.hx b/hmp-commands.hx index 63eac22734..c0a27688b6 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1085,14 +1085,16 @@ ERST { .name = "dump-guest-memory", - .args_type = "paging:-p,detach:-d,windmp:-w,zlib:-z,lzo:-l,snappy:-s,filename:F,begin:l?,length:l?", - .params = "[-p] [-d] [-z|-l|-s|-w] filename [begin length]", + .args_type = "paging:-p,detach:-d,windmp:-w,zlib:-z,lzo:-l,snappy:-s,raw:-R,filename:F,begin:l?,length:l?", + .params = "[-p] [-d] [-z|-l|-s|-w] [-R] filename [begin length]", .help = "dump guest memory into file 'filename'.\n\t\t\t" "-p: do paging to get guest's memory mapping.\n\t\t\t" "-d: return immediately (do not wait for completion).\n\t\t\t" "-z: dump in kdump-compressed format, with zlib compression.\n\t\t\t" "-l: dump in kdump-compressed format, with lzo compression.\n\t\t\t" "-s: dump in kdump-compressed format, with snappy compression.\n\t\t\t" + "-R: when using kdump (-z, -l, -s), use raw rather than makedumpfile-flattened\n\t\t\t" + " format\n\t\t\t" "-w: dump in Windows crashdump format (can be used instead of ELF-dump converting),\n\t\t\t" " for Windows x86 and x64 guests with vmcoreinfo driver only.\n\t\t\t" "begin: the starting physical address.\n\t\t\t" @@ -1115,6 +1117,9 @@ SRST dump in kdump-compressed format, with lzo compression. ``-s`` dump in kdump-compressed format, with snappy compression. + ``-R`` + when using kdump (-z, -l, -s), use raw rather than makedumpfile-flattened + format ``-w`` dump in Windows crashdump format (can be used instead of ELF-dump converting), for Windows x64 guests with vmcoreinfo driver only diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index a07cd7eb5d..bfa53960c3 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -115,9 +115,13 @@ static void xen_block_connect(XenDevice *xendev, Error **errp) return; } - if (xen_device_frontend_scanf(xendev, "protocol", "%ms", - &str) != 1) { - protocol = BLKIF_PROTOCOL_NATIVE; + if (xen_device_frontend_scanf(xendev, "protocol", "%ms", &str) != 1) { + /* x86 defaults to the 32-bit protocol even for 64-bit guests. */ + if (object_dynamic_cast(OBJECT(qdev_get_machine()), "x86-machine")) { + protocol = BLKIF_PROTOCOL_X86_32; + } else { + protocol = BLKIF_PROTOCOL_NATIVE; + } } else { if (strcmp(str, XEN_IO_PROTO_ABI_X86_32) == 0) { protocol = BLKIF_PROTOCOL_X86_32; diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c index 9a4b59c6f2..a6ff6a4875 100644 --- a/hw/core/machine-hmp-cmds.c +++ b/hw/core/machine-hmp-cmds.c @@ -253,6 +253,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) MemoryDeviceInfo *value; PCDIMMDeviceInfo *di; SgxEPCDeviceInfo *se; + HvBalloonDeviceInfo *hi; for (info = info_list; info; info = info->next) { value = info->value; @@ -310,6 +311,20 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) monitor_printf(mon, " node: %" PRId64 "\n", se->node); monitor_printf(mon, " memdev: %s\n", se->memdev); break; + case MEMORY_DEVICE_INFO_KIND_HV_BALLOON: + hi = value->u.hv_balloon.data; + monitor_printf(mon, "Memory device [%s]: \"%s\"\n", + MemoryDeviceInfoKind_str(value->type), + hi->id ? hi->id : ""); + if (hi->has_memaddr) { + monitor_printf(mon, " memaddr: 0x%" PRIx64 "\n", + hi->memaddr); + } + monitor_printf(mon, " max-size: %" PRIu64 "\n", hi->max_size); + if (hi->memdev) { + monitor_printf(mon, " memdev: %s\n", hi->memdev); + } + break; default: g_assert_not_reached(); } diff --git a/hw/display/ati.c b/hw/display/ati.c index 6e38e00502..9a87a5504a 100644 --- a/hw/display/ati.c +++ b/hw/display/ati.c @@ -319,11 +319,13 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr, unsigned int size) case DAC_CNTL: val = s->regs.dac_cntl; break; - case GPIO_VGA_DDC: - val = s->regs.gpio_vga_ddc; + case GPIO_VGA_DDC ... GPIO_VGA_DDC + 3: + val = ati_reg_read_offs(s->regs.gpio_vga_ddc, + addr - GPIO_VGA_DDC, size); break; - case GPIO_DVI_DDC: - val = s->regs.gpio_dvi_ddc; + case GPIO_DVI_DDC ... GPIO_DVI_DDC + 3: + val = ati_reg_read_offs(s->regs.gpio_dvi_ddc, + addr - GPIO_DVI_DDC, size); break; case GPIO_MONID ... GPIO_MONID + 3: val = ati_reg_read_offs(s->regs.gpio_monid, @@ -337,6 +339,9 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr, unsigned int size) case PALETTE_DATA: val = vga_ioport_read(&s->vga, VGA_PEL_D); break; + case PALETTE_30_DATA: + val = s->regs.palette[vga_ioport_read(&s->vga, VGA_PEL_IR)]; + break; case CNFG_CNTL: val = s->regs.config_cntl; break; @@ -349,14 +354,17 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr, unsigned int size) PCI_BASE_ADDRESS_0, size) & 0xfffffff0; break; case CONFIG_APER_SIZE: - val = s->vga.vram_size; + val = s->vga.vram_size / 2; break; case CONFIG_REG_1_BASE: val = pci_default_read_config(&s->dev, PCI_BASE_ADDRESS_2, size) & 0xfffffff0; break; case CONFIG_REG_APER_SIZE: - val = memory_region_size(&s->mm); + val = memory_region_size(&s->mm) / 2; + break; + case HOST_PATH_CNTL: + val = BIT(23); /* Radeon HDP_APER_CNTL */ break; case MC_STATUS: val = 5; @@ -612,29 +620,34 @@ static void ati_mm_write(void *opaque, hwaddr addr, s->regs.dac_cntl = data & 0xffffe3ff; s->vga.dac_8bit = !!(data & DAC_8BIT_EN); break; - case GPIO_VGA_DDC: + /* + * GPIO regs for DDC access. Because some drivers access these via + * multiple byte writes we have to be careful when we send bits to + * avoid spurious changes in bitbang_i2c state. Only do it when either + * the enable bits are changed or output bits changed while enabled. + */ + case GPIO_VGA_DDC ... GPIO_VGA_DDC + 3: if (s->dev_id != PCI_DEVICE_ID_ATI_RAGE128_PF) { /* FIXME: Maybe add a property to select VGA or DVI port? */ } break; - case GPIO_DVI_DDC: + case GPIO_DVI_DDC ... GPIO_DVI_DDC + 3: if (s->dev_id != PCI_DEVICE_ID_ATI_RAGE128_PF) { - s->regs.gpio_dvi_ddc = ati_i2c(&s->bbi2c, data, 0); + ati_reg_write_offs(&s->regs.gpio_dvi_ddc, + addr - GPIO_DVI_DDC, data, size); + if ((addr <= GPIO_DVI_DDC + 2 && addr + size > GPIO_DVI_DDC + 2) || + (addr == GPIO_DVI_DDC && (s->regs.gpio_dvi_ddc & 0x30000))) { + s->regs.gpio_dvi_ddc = ati_i2c(&s->bbi2c, + s->regs.gpio_dvi_ddc, 0); + } } break; case GPIO_MONID ... GPIO_MONID + 3: /* FIXME What does Radeon have here? */ if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) { + /* Rage128p accesses DDC via MONID(1-2) with additional mask bit */ ati_reg_write_offs(&s->regs.gpio_monid, addr - GPIO_MONID, data, size); - /* - * Rage128p accesses DDC used to get EDID via these bits. - * Because some drivers access this via multiple byte writes - * we have to be careful when we send bits to avoid spurious - * changes in bitbang_i2c state. So only do it when mask is set - * and either the enable bits are changed or output bits changed - * while enabled. - */ if ((s->regs.gpio_monid & BIT(25)) && ((addr <= GPIO_MONID + 2 && addr + size > GPIO_MONID + 2) || (addr == GPIO_MONID && (s->regs.gpio_monid & 0x60000)))) { @@ -663,6 +676,12 @@ static void ati_mm_write(void *opaque, hwaddr addr, data >>= 8; vga_ioport_write(&s->vga, VGA_PEL_D, data & 0xff); break; + case PALETTE_30_DATA: + s->regs.palette[vga_ioport_read(&s->vga, VGA_PEL_IW)] = data; + vga_ioport_write(&s->vga, VGA_PEL_D, (data >> 22) & 0xff); + vga_ioport_write(&s->vga, VGA_PEL_D, (data >> 12) & 0xff); + vga_ioport_write(&s->vga, VGA_PEL_D, (data >> 2) & 0xff); + break; case CNFG_CNTL: s->regs.config_cntl = data; break; @@ -1014,6 +1033,7 @@ static Property ati_vga_properties[] = { DEFINE_PROP_UINT16("x-device-id", ATIVGAState, dev_id, PCI_DEVICE_ID_ATI_RAGE128_PF), DEFINE_PROP_BOOL("guest_hwcursor", ATIVGAState, cursor_guest_mode, false), + DEFINE_PROP_UINT8("x-pixman", ATIVGAState, use_pixman, 3), DEFINE_PROP_END_OF_LIST() }; @@ -1035,11 +1055,18 @@ static void ati_vga_class_init(ObjectClass *klass, void *data) k->exit = ati_vga_exit; } +static void ati_vga_init(Object *o) +{ + object_property_set_description(o, "x-pixman", "Use pixman for: " + "1: fill, 2: blit"); +} + static const TypeInfo ati_vga_info = { .name = TYPE_ATI_VGA, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(ATIVGAState), .class_init = ati_vga_class_init, + .instance_init = ati_vga_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_CONVENTIONAL_PCI_DEVICE }, { }, diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c index 7d786653e8..0e6b8e4367 100644 --- a/hw/display/ati_2d.c +++ b/hw/display/ati_2d.c @@ -92,6 +92,7 @@ void ati_2d_blt(ATIVGAState *s) switch (s->regs.dp_mix & GMC_ROP3_MASK) { case ROP3_SRCCOPY: { + bool fallback = false; unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width); unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? @@ -122,27 +123,50 @@ void ati_2d_blt(ATIVGAState *s) src_bits, dst_bits, src_stride, dst_stride, bpp, bpp, src_x, src_y, dst_x, dst_y, s->regs.dst_width, s->regs.dst_height); - if (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT && + if ((s->use_pixman & BIT(1)) && + s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT && s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) { - pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits, - src_stride, dst_stride, bpp, bpp, - src_x, src_y, dst_x, dst_y, - s->regs.dst_width, s->regs.dst_height); - } else { + fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits, + src_stride, dst_stride, bpp, bpp, + src_x, src_y, dst_x, dst_y, + s->regs.dst_width, s->regs.dst_height); + } else if (s->use_pixman & BIT(1)) { /* FIXME: We only really need a temporary if src and dst overlap */ int llb = s->regs.dst_width * (bpp / 8); int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t)); uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) * s->regs.dst_height); - pixman_blt((uint32_t *)src_bits, tmp, - src_stride, tmp_stride, bpp, bpp, - src_x, src_y, 0, 0, - s->regs.dst_width, s->regs.dst_height); - pixman_blt(tmp, (uint32_t *)dst_bits, - tmp_stride, dst_stride, bpp, bpp, - 0, 0, dst_x, dst_y, - s->regs.dst_width, s->regs.dst_height); + fallback = !pixman_blt((uint32_t *)src_bits, tmp, + src_stride, tmp_stride, bpp, bpp, + src_x, src_y, 0, 0, + s->regs.dst_width, s->regs.dst_height); + if (!fallback) { + fallback = !pixman_blt(tmp, (uint32_t *)dst_bits, + tmp_stride, dst_stride, bpp, bpp, + 0, 0, dst_x, dst_y, + s->regs.dst_width, s->regs.dst_height); + } g_free(tmp); + } else { + fallback = true; + } + if (fallback) { + unsigned int y, i, j, bypp = bpp / 8; + unsigned int src_pitch = src_stride * sizeof(uint32_t); + unsigned int dst_pitch = dst_stride * sizeof(uint32_t); + + for (y = 0; y < s->regs.dst_height; y++) { + i = dst_x * bypp; + j = src_x * bypp; + if (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) { + i += (dst_y + y) * dst_pitch; + j += (src_y + y) * src_pitch; + } else { + i += (dst_y + s->regs.dst_height - 1 - y) * dst_pitch; + j += (src_y + s->regs.dst_height - 1 - y) * src_pitch; + } + memmove(&dst_bits[i], &src_bits[j], s->regs.dst_width * bypp); + } } if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr && dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr + @@ -180,14 +204,21 @@ void ati_2d_blt(ATIVGAState *s) dst_stride /= sizeof(uint32_t); DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n", - dst_bits, dst_stride, bpp, - dst_x, dst_y, - s->regs.dst_width, s->regs.dst_height, - filler); - pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, - dst_x, dst_y, - s->regs.dst_width, s->regs.dst_height, - filler); + dst_bits, dst_stride, bpp, dst_x, dst_y, + s->regs.dst_width, s->regs.dst_height, filler); + if (!(s->use_pixman & BIT(0)) || + !pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, dst_x, dst_y, + s->regs.dst_width, s->regs.dst_height, filler)) { + /* fallback when pixman failed or we don't want to call it */ + unsigned int x, y, i, bypp = bpp / 8; + unsigned int dst_pitch = dst_stride * sizeof(uint32_t); + for (y = 0; y < s->regs.dst_height; y++) { + i = dst_x * bypp + (dst_y + y) * dst_pitch; + for (x = 0; x < s->regs.dst_width; x++, i += bypp) { + stn_he_p(&dst_bits[i], bypp, filler); + } + } + } if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr && dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr + s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) { diff --git a/hw/display/ati_dbg.c b/hw/display/ati_dbg.c index bd0ecd48c7..3ffa7f35df 100644 --- a/hw/display/ati_dbg.c +++ b/hw/display/ati_dbg.c @@ -30,6 +30,7 @@ static struct ati_regdesc ati_reg_names[] = { {"AMCGPIO_EN_MIR", 0x00a8}, {"PALETTE_INDEX", 0x00b0}, {"PALETTE_DATA", 0x00b4}, + {"PALETTE_30_DATA", 0x00b8}, {"CNFG_CNTL", 0x00e0}, {"GEN_RESET_CNTL", 0x00f0}, {"CNFG_MEMSIZE", 0x00f8}, @@ -38,6 +39,7 @@ static struct ati_regdesc ati_reg_names[] = { {"CONFIG_APER_SIZE", 0x0108}, {"CONFIG_REG_1_BASE", 0x010c}, {"CONFIG_REG_APER_SIZE", 0x0110}, + {"HOST_PATH_CNTL", 0x0130}, {"MEM_CNTL", 0x0140}, {"MC_FB_LOCATION", 0x0148}, {"MC_AGP_LOCATION", 0x014C}, diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h index e8d3c7af75..f5a47b82b0 100644 --- a/hw/display/ati_int.h +++ b/hw/display/ati_int.h @@ -44,6 +44,7 @@ typedef struct ATIVGARegs { uint32_t gpio_dvi_ddc; uint32_t gpio_monid; uint32_t config_cntl; + uint32_t palette[256]; uint32_t crtc_h_total_disp; uint32_t crtc_h_sync_strt_wid; uint32_t crtc_v_total_disp; @@ -89,6 +90,7 @@ struct ATIVGAState { char *model; uint16_t dev_id; uint8_t mode; + uint8_t use_pixman; bool cursor_guest_mode; uint16_t cursor_size; uint32_t cursor_offset; diff --git a/hw/display/ati_regs.h b/hw/display/ati_regs.h index d6282b2ef2..d7127748ff 100644 --- a/hw/display/ati_regs.h +++ b/hw/display/ati_regs.h @@ -48,6 +48,7 @@ #define AMCGPIO_EN_MIR 0x00a8 #define PALETTE_INDEX 0x00b0 #define PALETTE_DATA 0x00b4 +#define PALETTE_30_DATA 0x00b8 #define CNFG_CNTL 0x00e0 #define GEN_RESET_CNTL 0x00f0 #define CNFG_MEMSIZE 0x00f8 @@ -56,6 +57,7 @@ #define CONFIG_APER_SIZE 0x0108 #define CONFIG_REG_1_BASE 0x010c #define CONFIG_REG_APER_SIZE 0x0110 +#define HOST_PATH_CNTL 0x0130 #define MEM_CNTL 0x0140 #define MC_FB_LOCATION 0x0148 #define MC_AGP_LOCATION 0x014C diff --git a/hw/display/macfb.c b/hw/display/macfb.c index 2f8e016566..d61541ccb5 100644 --- a/hw/display/macfb.c +++ b/hw/display/macfb.c @@ -36,8 +36,8 @@ #define DAFB_INTR_MASK 0x104 #define DAFB_INTR_STAT 0x108 #define DAFB_INTR_CLEAR 0x10c -#define DAFB_RESET 0x200 -#define DAFB_LUT 0x213 +#define DAFB_LUT_INDEX 0x200 +#define DAFB_LUT 0x210 #define DAFB_INTR_VBL 0x4 @@ -537,6 +537,11 @@ static uint64_t macfb_ctrl_read(void *opaque, case DAFB_MODE_SENSE: val = macfb_sense_read(s); break; + case DAFB_LUT ... DAFB_LUT + 3: + val = s->color_palette[s->palette_current]; + s->palette_current = (s->palette_current + 1) % + ARRAY_SIZE(s->color_palette); + break; default: if (addr < MACFB_CTRL_TOPADDR) { val = s->regs[addr >> 2]; @@ -583,13 +588,11 @@ static void macfb_ctrl_write(void *opaque, s->regs[DAFB_INTR_STAT >> 2] &= ~DAFB_INTR_VBL; macfb_update_irq(s); break; - case DAFB_RESET: - s->palette_current = 0; - s->regs[DAFB_INTR_STAT >> 2] &= ~DAFB_INTR_VBL; - macfb_update_irq(s); + case DAFB_LUT_INDEX: + s->palette_current = (val & 0xff) * 3; break; - case DAFB_LUT: - s->color_palette[s->palette_current] = val; + case DAFB_LUT ... DAFB_LUT + 3: + s->color_palette[s->palette_current] = val & 0xff; s->palette_current = (s->palette_current + 1) % ARRAY_SIZE(s->color_palette); if (s->palette_current % 3) { diff --git a/hw/display/virtio-gpu-pci-rutabaga.c b/hw/display/virtio-gpu-pci-rutabaga.c index c96729e198..abbb898c65 100644 --- a/hw/display/virtio-gpu-pci-rutabaga.c +++ b/hw/display/virtio-gpu-pci-rutabaga.c @@ -36,6 +36,7 @@ static const TypeInfo virtio_gpu_rutabaga_pci_info[] = { .instance_init = virtio_gpu_rutabaga_initfn, .interfaces = (InterfaceInfo[]) { { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { }, } }, }; diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 4265316cbb..2707bceea8 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -1213,6 +1213,9 @@ static int virtio_gpu_save(QEMUFile *f, void *opaque, size_t size, assert(QTAILQ_EMPTY(&g->cmdq)); QTAILQ_FOREACH(res, &g->reslist, next) { + if (res->blob_size) { + continue; + } qemu_put_be32(f, res->resource_id); qemu_put_be32(f, res->width); qemu_put_be32(f, res->height); @@ -1230,12 +1233,40 @@ static int virtio_gpu_save(QEMUFile *f, void *opaque, size_t size, return vmstate_save_state(f, &vmstate_virtio_gpu_scanouts, g, NULL); } +static bool virtio_gpu_load_restore_mapping(VirtIOGPU *g, + struct virtio_gpu_simple_resource *res) +{ + int i; + + for (i = 0; i < res->iov_cnt; i++) { + hwaddr len = res->iov[i].iov_len; + res->iov[i].iov_base = + dma_memory_map(VIRTIO_DEVICE(g)->dma_as, res->addrs[i], &len, + DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED); + + if (!res->iov[i].iov_base || len != res->iov[i].iov_len) { + /* Clean up the half-a-mapping we just created... */ + if (res->iov[i].iov_base) { + dma_memory_unmap(VIRTIO_DEVICE(g)->dma_as, res->iov[i].iov_base, + len, DMA_DIRECTION_TO_DEVICE, 0); + } + /* ...and the mappings for previous loop iterations */ + res->iov_cnt = i; + virtio_gpu_cleanup_mapping(g, res); + return false; + } + } + + QTAILQ_INSERT_HEAD(&g->reslist, res, next); + g->hostmem += res->hostmem; + return true; +} + static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size, const VMStateField *field) { VirtIOGPU *g = opaque; struct virtio_gpu_simple_resource *res; - struct virtio_gpu_scanout *scanout; uint32_t resource_id, pformat; void *bits = NULL; int i; @@ -1294,40 +1325,96 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size, qemu_get_buffer(f, (void *)pixman_image_get_data(res->image), pixman_image_get_stride(res->image) * res->height); - /* restore mapping */ - for (i = 0; i < res->iov_cnt; i++) { - hwaddr len = res->iov[i].iov_len; - res->iov[i].iov_base = - dma_memory_map(VIRTIO_DEVICE(g)->dma_as, res->addrs[i], &len, - DMA_DIRECTION_TO_DEVICE, - MEMTXATTRS_UNSPECIFIED); - - if (!res->iov[i].iov_base || len != res->iov[i].iov_len) { - /* Clean up the half-a-mapping we just created... */ - if (res->iov[i].iov_base) { - dma_memory_unmap(VIRTIO_DEVICE(g)->dma_as, - res->iov[i].iov_base, - len, - DMA_DIRECTION_TO_DEVICE, - 0); - } - /* ...and the mappings for previous loop iterations */ - res->iov_cnt = i; - virtio_gpu_cleanup_mapping(g, res); - pixman_image_unref(res->image); - g_free(res); - return -EINVAL; - } + if (!virtio_gpu_load_restore_mapping(g, res)) { + pixman_image_unref(res->image); + g_free(res); + return -EINVAL; } - QTAILQ_INSERT_HEAD(&g->reslist, res, next); - g->hostmem += res->hostmem; - resource_id = qemu_get_be32(f); } /* load & apply scanout state */ vmstate_load_state(f, &vmstate_virtio_gpu_scanouts, g, 1); + + return 0; +} + +static int virtio_gpu_blob_save(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field, JSONWriter *vmdesc) +{ + VirtIOGPU *g = opaque; + struct virtio_gpu_simple_resource *res; + int i; + + /* in 2d mode we should never find unprocessed commands here */ + assert(QTAILQ_EMPTY(&g->cmdq)); + + QTAILQ_FOREACH(res, &g->reslist, next) { + if (!res->blob_size) { + continue; + } + qemu_put_be32(f, res->resource_id); + qemu_put_be32(f, res->blob_size); + qemu_put_be32(f, res->iov_cnt); + for (i = 0; i < res->iov_cnt; i++) { + qemu_put_be64(f, res->addrs[i]); + qemu_put_be32(f, res->iov[i].iov_len); + } + } + qemu_put_be32(f, 0); /* end of list */ + + return 0; +} + +static int virtio_gpu_blob_load(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field) +{ + VirtIOGPU *g = opaque; + struct virtio_gpu_simple_resource *res; + uint32_t resource_id; + int i; + + resource_id = qemu_get_be32(f); + while (resource_id != 0) { + res = virtio_gpu_find_resource(g, resource_id); + if (res) { + return -EINVAL; + } + + res = g_new0(struct virtio_gpu_simple_resource, 1); + res->resource_id = resource_id; + res->blob_size = qemu_get_be32(f); + res->iov_cnt = qemu_get_be32(f); + res->addrs = g_new(uint64_t, res->iov_cnt); + res->iov = g_new(struct iovec, res->iov_cnt); + + /* read data */ + for (i = 0; i < res->iov_cnt; i++) { + res->addrs[i] = qemu_get_be64(f); + res->iov[i].iov_len = qemu_get_be32(f); + } + + if (!virtio_gpu_load_restore_mapping(g, res)) { + g_free(res); + return -EINVAL; + } + + virtio_gpu_init_udmabuf(res); + + resource_id = qemu_get_be32(f); + } + + return 0; +} + +static int virtio_gpu_post_load(void *opaque, int version_id) +{ + VirtIOGPU *g = opaque; + struct virtio_gpu_scanout *scanout; + struct virtio_gpu_simple_resource *res; + int i; + for (i = 0; i < g->parent_obj.conf.max_outputs; i++) { /* FIXME: should take scanout.r.{x,y} into account */ scanout = &g->parent_obj.scanout[i]; @@ -1475,6 +1562,32 @@ virtio_gpu_set_config(VirtIODevice *vdev, const uint8_t *config) } } +static bool virtio_gpu_blob_state_needed(void *opaque) +{ + VirtIOGPU *g = VIRTIO_GPU(opaque); + + return virtio_gpu_blob_enabled(g->parent_obj.conf); +} + +const VMStateDescription vmstate_virtio_gpu_blob_state = { + .name = "virtio-gpu/blob", + .minimum_version_id = VIRTIO_GPU_VM_VERSION, + .version_id = VIRTIO_GPU_VM_VERSION, + .needed = virtio_gpu_blob_state_needed, + .fields = (const VMStateField[]){ + { + .name = "virtio-gpu/blob", + .info = &(const VMStateInfo) { + .name = "blob", + .get = virtio_gpu_blob_load, + .put = virtio_gpu_blob_save, + }, + .flags = VMS_SINGLE, + } /* device */, + VMSTATE_END_OF_LIST() + }, +}; + /* * For historical reasons virtio_gpu does not adhere to virtio migration * scheme as described in doc/virtio-migration.txt, in a sense that no @@ -1500,6 +1613,11 @@ static const VMStateDescription vmstate_virtio_gpu = { } /* device */, VMSTATE_END_OF_LIST() }, + .subsections = (const VMStateDescription * []) { + &vmstate_virtio_gpu_blob_state, + NULL + }, + .post_load = virtio_gpu_post_load, }; static Property virtio_gpu_properties[] = { diff --git a/hw/hyperv/Kconfig b/hw/hyperv/Kconfig index fcf65903bd..41dd827c84 100644 --- a/hw/hyperv/Kconfig +++ b/hw/hyperv/Kconfig @@ -16,3 +16,13 @@ config SYNDBG bool default y depends on VMBUS + +config HV_BALLOON_SUPPORTED + bool + +config HV_BALLOON + bool + default y + depends on VMBUS + depends on HV_BALLOON_POSSIBLE + depends on HV_BALLOON_SUPPORTED diff --git a/hw/hyperv/hv-balloon-internal.h b/hw/hyperv/hv-balloon-internal.h new file mode 100644 index 0000000000..164c2e5825 --- /dev/null +++ b/hw/hyperv/hv-balloon-internal.h @@ -0,0 +1,33 @@ +/* + * QEMU Hyper-V Dynamic Memory Protocol driver + * + * Copyright (C) 2020-2023 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_HV_BALLOON_INTERNAL_H +#define HW_HYPERV_HV_BALLOON_INTERNAL_H + +#include "qemu/osdep.h" + +#define HV_BALLOON_PFN_SHIFT 12 +#define HV_BALLOON_PAGE_SIZE (1 << HV_BALLOON_PFN_SHIFT) + +#define SUM_OVERFLOW_U64(in1, in2) ((in1) > UINT64_MAX - (in2)) +#define SUM_SATURATE_U64(in1, in2) \ + ({ \ + uint64_t _in1 = (in1), _in2 = (in2); \ + uint64_t _result; \ + \ + if (!SUM_OVERFLOW_U64(_in1, _in2)) { \ + _result = _in1 + _in2; \ + } else { \ + _result = UINT64_MAX; \ + } \ + \ + _result; \ + }) + +#endif diff --git a/hw/hyperv/hv-balloon-our_range_memslots.c b/hw/hyperv/hv-balloon-our_range_memslots.c new file mode 100644 index 0000000000..99bae870f3 --- /dev/null +++ b/hw/hyperv/hv-balloon-our_range_memslots.c @@ -0,0 +1,201 @@ +/* + * QEMU Hyper-V Dynamic Memory Protocol driver + * + * Copyright (C) 2020-2023 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "hv-balloon-internal.h" +#include "hv-balloon-our_range_memslots.h" +#include "trace.h" + +/* OurRange */ +static void our_range_init(OurRange *our_range, uint64_t start, uint64_t count) +{ + assert(count <= UINT64_MAX - start); + our_range->range.start = start; + our_range->range.count = count; + + hvb_page_range_tree_init(&our_range->removed_guest); + hvb_page_range_tree_init(&our_range->removed_both); + + /* mark the whole range as unused but for potential use */ + our_range->added = 0; + our_range->unusable_tail = 0; +} + +static void our_range_destroy(OurRange *our_range) +{ + hvb_page_range_tree_destroy(&our_range->removed_guest); + hvb_page_range_tree_destroy(&our_range->removed_both); +} + +void hvb_our_range_clear_removed_trees(OurRange *our_range) +{ + hvb_page_range_tree_destroy(&our_range->removed_guest); + hvb_page_range_tree_destroy(&our_range->removed_both); + hvb_page_range_tree_init(&our_range->removed_guest); + hvb_page_range_tree_init(&our_range->removed_both); +} + +void hvb_our_range_mark_added(OurRange *our_range, uint64_t additional_size) +{ + assert(additional_size <= UINT64_MAX - our_range->added); + + our_range->added += additional_size; + + assert(our_range->added <= UINT64_MAX - our_range->unusable_tail); + assert(our_range->added + our_range->unusable_tail <= + our_range->range.count); +} + +/* OurRangeMemslots */ +static void our_range_memslots_init_slots(OurRangeMemslots *our_range, + MemoryRegion *backing_mr, + Object *memslot_owner) +{ + OurRangeMemslotsSlots *memslots = &our_range->slots; + unsigned int idx; + uint64_t memslot_offset; + + assert(memslots->count > 0); + memslots->slots = g_new0(MemoryRegion, memslots->count); + + /* Initialize our memslots, but don't map them yet. */ + assert(memslots->size_each > 0); + for (idx = 0, memslot_offset = 0; idx < memslots->count; + idx++, memslot_offset += memslots->size_each) { + uint64_t memslot_size; + g_autofree char *name = NULL; + + /* The size of the last memslot might be smaller. */ + if (idx == memslots->count - 1) { + uint64_t region_size; + + assert(our_range->mr); + region_size = memory_region_size(our_range->mr); + memslot_size = region_size - memslot_offset; + } else { + memslot_size = memslots->size_each; + } + + name = g_strdup_printf("memslot-%u", idx); + memory_region_init_alias(&memslots->slots[idx], memslot_owner, name, + backing_mr, memslot_offset, memslot_size); + /* + * We want to be able to atomically and efficiently activate/deactivate + * individual memslots without affecting adjacent memslots in memory + * notifiers. + */ + memory_region_set_unmergeable(&memslots->slots[idx], true); + } + + memslots->mapped_count = 0; +} + +OurRangeMemslots *hvb_our_range_memslots_new(uint64_t addr, + MemoryRegion *parent_mr, + MemoryRegion *backing_mr, + Object *memslot_owner, + unsigned int memslot_count, + uint64_t memslot_size) +{ + OurRangeMemslots *our_range; + + our_range = g_malloc(sizeof(*our_range)); + our_range_init(&our_range->range, + addr / HV_BALLOON_PAGE_SIZE, + memory_region_size(parent_mr) / HV_BALLOON_PAGE_SIZE); + our_range->slots.size_each = memslot_size; + our_range->slots.count = memslot_count; + our_range->mr = parent_mr; + our_range_memslots_init_slots(our_range, backing_mr, memslot_owner); + + return our_range; +} + +static void our_range_memslots_free_memslots(OurRangeMemslots *our_range) +{ + OurRangeMemslotsSlots *memslots = &our_range->slots; + unsigned int idx; + uint64_t offset; + + memory_region_transaction_begin(); + for (idx = 0, offset = 0; idx < memslots->mapped_count; + idx++, offset += memslots->size_each) { + trace_hv_balloon_unmap_slot(idx, memslots->count, offset); + assert(memory_region_is_mapped(&memslots->slots[idx])); + memory_region_del_subregion(our_range->mr, &memslots->slots[idx]); + } + memory_region_transaction_commit(); + + for (idx = 0; idx < memslots->count; idx++) { + object_unparent(OBJECT(&memslots->slots[idx])); + } + + g_clear_pointer(&our_range->slots.slots, g_free); +} + +void hvb_our_range_memslots_free(OurRangeMemslots *our_range) +{ + OurRangeMemslotsSlots *memslots = &our_range->slots; + MemoryRegion *hostmem_mr; + RAMBlock *rb; + + assert(our_range->slots.count > 0); + assert(our_range->slots.slots); + + hostmem_mr = memslots->slots[0].alias; + rb = hostmem_mr->ram_block; + ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); + + our_range_memslots_free_memslots(our_range); + our_range_destroy(&our_range->range); + g_free(our_range); +} + +void hvb_our_range_memslots_ensure_mapped_additional(OurRangeMemslots *our_range, + uint64_t additional_map_size) +{ + OurRangeMemslotsSlots *memslots = &our_range->slots; + uint64_t total_map_size; + unsigned int idx; + uint64_t offset; + + total_map_size = (our_range->range.added + additional_map_size) * + HV_BALLOON_PAGE_SIZE; + idx = memslots->mapped_count; + assert(memslots->size_each > 0); + offset = idx * memslots->size_each; + + /* + * Activate all memslots covered by the newly added region in a single + * transaction. + */ + memory_region_transaction_begin(); + for ( ; idx < memslots->count; + idx++, offset += memslots->size_each) { + /* + * If this memslot starts beyond or at the end of the range to map so + * does every next one. + */ + if (offset >= total_map_size) { + break; + } + + /* + * Instead of enabling/disabling memslot, we add/remove them. This + * should make address space updates faster, because we don't have to + * loop over many disabled subregions. + */ + trace_hv_balloon_map_slot(idx, memslots->count, offset); + assert(!memory_region_is_mapped(&memslots->slots[idx])); + memory_region_add_subregion(our_range->mr, offset, + &memslots->slots[idx]); + + memslots->mapped_count++; + } + memory_region_transaction_commit(); +} diff --git a/hw/hyperv/hv-balloon-our_range_memslots.h b/hw/hyperv/hv-balloon-our_range_memslots.h new file mode 100644 index 0000000000..b6f592d34b --- /dev/null +++ b/hw/hyperv/hv-balloon-our_range_memslots.h @@ -0,0 +1,110 @@ +/* + * QEMU Hyper-V Dynamic Memory Protocol driver + * + * Copyright (C) 2020-2023 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_HV_BALLOON_OUR_RANGE_MEMSLOTS_H +#define HW_HYPERV_HV_BALLOON_OUR_RANGE_MEMSLOTS_H + +#include "qemu/osdep.h" + +#include "exec/memory.h" +#include "qom/object.h" +#include "hv-balloon-page_range_tree.h" + +/* OurRange */ +#define OUR_RANGE(ptr) ((OurRange *)(ptr)) + +/* "our range" means the memory range owned by this driver (for hot-adding) */ +typedef struct OurRange { + PageRange range; + + /* How many pages were hot-added to the guest */ + uint64_t added; + + /* Pages at the end not currently usable */ + uint64_t unusable_tail; + + /* Memory removed from the guest */ + PageRangeTree removed_guest, removed_both; +} OurRange; + +static inline uint64_t our_range_get_remaining_start(OurRange *our_range) +{ + return our_range->range.start + our_range->added; +} + +static inline uint64_t our_range_get_remaining_size(OurRange *our_range) +{ + return our_range->range.count - our_range->added - our_range->unusable_tail; +} + +void hvb_our_range_mark_added(OurRange *our_range, uint64_t additional_size); + +static inline void our_range_mark_remaining_unusable(OurRange *our_range) +{ + our_range->unusable_tail = our_range->range.count - our_range->added; +} + +static inline PageRangeTree our_range_get_removed_tree(OurRange *our_range, + bool both) +{ + if (both) { + return our_range->removed_both; + } else { + return our_range->removed_guest; + } +} + +static inline bool our_range_is_removed_tree_empty(OurRange *our_range, + bool both) +{ + if (both) { + return page_range_tree_is_empty(our_range->removed_both); + } else { + return page_range_tree_is_empty(our_range->removed_guest); + } +} + +void hvb_our_range_clear_removed_trees(OurRange *our_range); + +/* OurRangeMemslots */ +typedef struct OurRangeMemslotsSlots { + /* Nominal size of each memslot (the last one might be smaller) */ + uint64_t size_each; + + /* Slots array and its element count */ + MemoryRegion *slots; + unsigned int count; + + /* How many slots are currently mapped */ + unsigned int mapped_count; +} OurRangeMemslotsSlots; + +typedef struct OurRangeMemslots { + OurRange range; + + /* Memslots covering our range */ + OurRangeMemslotsSlots slots; + + MemoryRegion *mr; +} OurRangeMemslots; + +OurRangeMemslots *hvb_our_range_memslots_new(uint64_t addr, + MemoryRegion *parent_mr, + MemoryRegion *backing_mr, + Object *memslot_owner, + unsigned int memslot_count, + uint64_t memslot_size); +void hvb_our_range_memslots_free(OurRangeMemslots *our_range); + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(OurRangeMemslots, hvb_our_range_memslots_free) + +void hvb_our_range_memslots_ensure_mapped_additional(OurRangeMemslots *our_range, + uint64_t additional_map_size); + +#endif diff --git a/hw/hyperv/hv-balloon-page_range_tree.c b/hw/hyperv/hv-balloon-page_range_tree.c new file mode 100644 index 0000000000..e178d8b413 --- /dev/null +++ b/hw/hyperv/hv-balloon-page_range_tree.c @@ -0,0 +1,228 @@ +/* + * QEMU Hyper-V Dynamic Memory Protocol driver + * + * Copyright (C) 2020-2023 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "hv-balloon-internal.h" +#include "hv-balloon-page_range_tree.h" + +/* + * temporarily avoid warnings about enhanced GTree API usage requiring a + * too recent Glib version until GLIB_VERSION_MAX_ALLOWED finally reaches + * the Glib version with this API + */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + +/* PageRangeTree */ +static gint page_range_tree_key_compare(gconstpointer leftp, + gconstpointer rightp, + gpointer user_data) +{ + const uint64_t *left = leftp, *right = rightp; + + if (*left < *right) { + return -1; + } else if (*left > *right) { + return 1; + } else { /* *left == *right */ + return 0; + } +} + +static GTreeNode *page_range_tree_insert_new(PageRangeTree tree, + uint64_t start, uint64_t count) +{ + uint64_t *key = g_malloc(sizeof(*key)); + PageRange *range = g_malloc(sizeof(*range)); + + assert(count > 0); + + *key = range->start = start; + range->count = count; + + return g_tree_insert_node(tree.t, key, range); +} + +void hvb_page_range_tree_insert(PageRangeTree tree, + uint64_t start, uint64_t count, + uint64_t *dupcount) +{ + GTreeNode *node; + bool joinable; + uint64_t intersection; + PageRange *range; + + assert(!SUM_OVERFLOW_U64(start, count)); + if (count == 0) { + return; + } + + node = g_tree_upper_bound(tree.t, &start); + if (node) { + node = g_tree_node_previous(node); + } else { + node = g_tree_node_last(tree.t); + } + + if (node) { + range = g_tree_node_value(node); + assert(range); + intersection = page_range_intersection_size(range, start, count); + joinable = page_range_joinable_right(range, start, count); + } + + if (!node || + (!intersection && !joinable)) { + /* + * !node case: the tree is empty or the very first node in the tree + * already has a higher key (the start of its range). + * the other case: there is a gap in the tree between the new range + * and the previous one. + * anyway, let's just insert the new range into the tree. + */ + node = page_range_tree_insert_new(tree, start, count); + assert(node); + range = g_tree_node_value(node); + assert(range); + } else { + /* + * the previous range in the tree either partially covers the new + * range or ends just at its beginning - extend it + */ + if (dupcount) { + *dupcount += intersection; + } + + count += start - range->start; + range->count = MAX(range->count, count); + } + + /* check next nodes for possible merging */ + for (node = g_tree_node_next(node); node; ) { + PageRange *rangecur; + + rangecur = g_tree_node_value(node); + assert(rangecur); + + intersection = page_range_intersection_size(rangecur, + range->start, range->count); + joinable = page_range_joinable_left(rangecur, + range->start, range->count); + if (!intersection && !joinable) { + /* the current node is disjoint */ + break; + } + + if (dupcount) { + *dupcount += intersection; + } + + count = rangecur->count + (rangecur->start - range->start); + range->count = MAX(range->count, count); + + /* the current node was merged in, remove it */ + start = rangecur->start; + node = g_tree_node_next(node); + /* no hinted removal in GTree... */ + g_tree_remove(tree.t, &start); + } +} + +bool hvb_page_range_tree_pop(PageRangeTree tree, PageRange *out, + uint64_t maxcount) +{ + GTreeNode *node; + PageRange *range; + + node = g_tree_node_last(tree.t); + if (!node) { + return false; + } + + range = g_tree_node_value(node); + assert(range); + + out->start = range->start; + + /* can't modify range->start as it is the node key */ + if (range->count > maxcount) { + out->start += range->count - maxcount; + out->count = maxcount; + range->count -= maxcount; + } else { + out->count = range->count; + /* no hinted removal in GTree... */ + g_tree_remove(tree.t, &out->start); + } + + return true; +} + +bool hvb_page_range_tree_intree_any(PageRangeTree tree, + uint64_t start, uint64_t count) +{ + GTreeNode *node; + + if (count == 0) { + return false; + } + + /* find the first node that can possibly intersect our range */ + node = g_tree_upper_bound(tree.t, &start); + if (node) { + /* + * a NULL node below means that the very first node in the tree + * already has a higher key (the start of its range). + */ + node = g_tree_node_previous(node); + } else { + /* a NULL node below means that the tree is empty */ + node = g_tree_node_last(tree.t); + } + /* node range start <= range start */ + + if (!node) { + /* node range start > range start */ + node = g_tree_node_first(tree.t); + } + + for ( ; node; node = g_tree_node_next(node)) { + PageRange *range = g_tree_node_value(node); + + assert(range); + /* + * if this node starts beyond or at the end of our range so does + * every next one + */ + if (range->start >= start + count) { + break; + } + + if (page_range_intersection_size(range, start, count) > 0) { + return true; + } + } + + return false; +} + +void hvb_page_range_tree_init(PageRangeTree *tree) +{ + tree->t = g_tree_new_full(page_range_tree_key_compare, NULL, + g_free, g_free); +} + +void hvb_page_range_tree_destroy(PageRangeTree *tree) +{ + /* g_tree_destroy() is not NULL-safe */ + if (!tree->t) { + return; + } + + g_tree_destroy(tree->t); + tree->t = NULL; +} diff --git a/hw/hyperv/hv-balloon-page_range_tree.h b/hw/hyperv/hv-balloon-page_range_tree.h new file mode 100644 index 0000000000..07a9ae0da6 --- /dev/null +++ b/hw/hyperv/hv-balloon-page_range_tree.h @@ -0,0 +1,118 @@ +/* + * QEMU Hyper-V Dynamic Memory Protocol driver + * + * Copyright (C) 2020-2023 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_HV_BALLOON_PAGE_RANGE_TREE_H +#define HW_HYPERV_HV_BALLOON_PAGE_RANGE_TREE_H + +#include "qemu/osdep.h" + +/* PageRange */ +typedef struct PageRange { + uint64_t start; + uint64_t count; +} PageRange; + +/* return just the part of range before (start) */ +static inline void page_range_part_before(const PageRange *range, + uint64_t start, PageRange *out) +{ + uint64_t endr = range->start + range->count; + uint64_t end = MIN(endr, start); + + out->start = range->start; + if (end > out->start) { + out->count = end - out->start; + } else { + out->count = 0; + } +} + +/* return just the part of range after (start, count) */ +static inline void page_range_part_after(const PageRange *range, + uint64_t start, uint64_t count, + PageRange *out) +{ + uint64_t end = range->start + range->count; + uint64_t ends = start + count; + + out->start = MAX(range->start, ends); + if (end > out->start) { + out->count = end - out->start; + } else { + out->count = 0; + } +} + +static inline void page_range_intersect(const PageRange *range, + uint64_t start, uint64_t count, + PageRange *out) +{ + uint64_t end1 = range->start + range->count; + uint64_t end2 = start + count; + uint64_t end = MIN(end1, end2); + + out->start = MAX(range->start, start); + out->count = out->start < end ? end - out->start : 0; +} + +static inline uint64_t page_range_intersection_size(const PageRange *range, + uint64_t start, uint64_t count) +{ + PageRange trange; + + page_range_intersect(range, start, count, &trange); + return trange.count; +} + +static inline bool page_range_joinable_left(const PageRange *range, + uint64_t start, uint64_t count) +{ + return start + count == range->start; +} + +static inline bool page_range_joinable_right(const PageRange *range, + uint64_t start, uint64_t count) +{ + return range->start + range->count == start; +} + +static inline bool page_range_joinable(const PageRange *range, + uint64_t start, uint64_t count) +{ + return page_range_joinable_left(range, start, count) || + page_range_joinable_right(range, start, count); +} + +/* PageRangeTree */ +/* type safety */ +typedef struct PageRangeTree { + GTree *t; +} PageRangeTree; + +static inline bool page_range_tree_is_empty(PageRangeTree tree) +{ + guint nnodes = g_tree_nnodes(tree.t); + + return nnodes == 0; +} + +void hvb_page_range_tree_init(PageRangeTree *tree); +void hvb_page_range_tree_destroy(PageRangeTree *tree); + +bool hvb_page_range_tree_intree_any(PageRangeTree tree, + uint64_t start, uint64_t count); + +bool hvb_page_range_tree_pop(PageRangeTree tree, PageRange *out, + uint64_t maxcount); + +void hvb_page_range_tree_insert(PageRangeTree tree, + uint64_t start, uint64_t count, + uint64_t *dupcount); + +#endif diff --git a/hw/hyperv/hv-balloon-stub.c b/hw/hyperv/hv-balloon-stub.c new file mode 100644 index 0000000000..a47412d4a8 --- /dev/null +++ b/hw/hyperv/hv-balloon-stub.c @@ -0,0 +1,19 @@ +/* + * QEMU Hyper-V Dynamic Memory Protocol driver + * + * Copyright (C) 2023 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/qapi-types-machine.h" + +HvBalloonInfo *qmp_query_hv_balloon_status_report(Error **errp) +{ + error_setg(errp, "hv-balloon device not enabled in this build"); + return NULL; +} diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c new file mode 100644 index 0000000000..66f297c1d7 --- /dev/null +++ b/hw/hyperv/hv-balloon.c @@ -0,0 +1,1769 @@ +/* + * QEMU Hyper-V Dynamic Memory Protocol driver + * + * Copyright (C) 2020-2023 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "hv-balloon-internal.h" + +#include "exec/address-spaces.h" +#include "exec/cpu-common.h" +#include "exec/ramblock.h" +#include "hw/boards.h" +#include "hw/hyperv/dynmem-proto.h" +#include "hw/hyperv/hv-balloon.h" +#include "hw/hyperv/vmbus.h" +#include "hw/mem/memory-device.h" +#include "hw/mem/pc-dimm.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "monitor/qdev.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/qapi-events-machine.h" +#include "qapi/qapi-types-machine.h" +#include "qapi/qmp/qdict.h" +#include "qapi/visitor.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "qemu/units.h" +#include "qemu/timer.h" +#include "sysemu/balloon.h" +#include "sysemu/hostmem.h" +#include "sysemu/reset.h" +#include "hv-balloon-our_range_memslots.h" +#include "hv-balloon-page_range_tree.h" +#include "trace.h" + +#define HV_BALLOON_ADDR_PROP "addr" +#define HV_BALLOON_MEMDEV_PROP "memdev" +#define HV_BALLOON_GUID "525074DC-8985-46e2-8057-A307DC18A502" + +/* + * Some Windows versions (at least Server 2019) will crash with various + * error codes when receiving DM protocol requests (at least + * DM_MEM_HOT_ADD_REQUEST) immediately after boot. + * + * It looks like Hyper-V from Server 2016 uses a 50-second after-boot + * delay, probably to workaround this issue, so we'll use this value, too. + */ +#define HV_BALLOON_POST_INIT_WAIT (50 * 1000) + +#define HV_BALLOON_HA_CHUNK_SIZE (2 * GiB) +#define HV_BALLOON_HA_CHUNK_PAGES (HV_BALLOON_HA_CHUNK_SIZE / HV_BALLOON_PAGE_SIZE) + +#define HV_BALLOON_HA_MEMSLOT_SIZE_ALIGN (128 * MiB) + +#define HV_BALLOON_HR_CHUNK_PAGES 585728 +/* + * ^ that's the maximum number of pages + * that Windows returns in one hot remove response + * + * If the number requested is too high Windows will no longer honor + * these requests + */ + +struct HvBalloonClass { + VMBusDeviceClass parent_class; +} HvBalloonClass; + +typedef enum State { + /* not a real state */ + S_NO_CHANGE = 0, + + S_WAIT_RESET, + S_POST_RESET_CLOSED, + + /* init flow */ + S_VERSION, + S_CAPS, + S_POST_INIT_WAIT, + + S_IDLE, + + /* balloon op flow */ + S_BALLOON_POSTING, + S_BALLOON_RB_WAIT, + S_BALLOON_REPLY_WAIT, + + /* unballoon + hot add ops flow */ + S_UNBALLOON_POSTING, + S_UNBALLOON_RB_WAIT, + S_UNBALLOON_REPLY_WAIT, + S_HOT_ADD_SETUP, + S_HOT_ADD_RB_WAIT, + S_HOT_ADD_POSTING, + S_HOT_ADD_REPLY_WAIT, +} State; + +typedef struct StateDesc { + State state; + const char *desc; +} StateDesc; + +typedef struct HvBalloon { + VMBusDevice parent; + State state; + + union dm_version version; + union dm_caps caps; + + QEMUTimer post_init_timer; + + unsigned int trans_id; + + struct { + bool enabled; + bool received; + uint64_t committed; + uint64_t available; + } status_report; + + /* Guest target size */ + uint64_t target; + bool target_changed; + + /* Current (un)balloon / hot-add operation parameters */ + union { + uint64_t balloon_diff; + + struct { + uint64_t unballoon_diff; + uint64_t hot_add_diff; + }; + + struct { + PageRange hot_add_range; + uint64_t ha_current_count; + }; + }; + + OurRangeMemslots *our_range; + + /* Count of memslots covering our memory */ + unsigned int memslot_count; + + /* Nominal size of each memslot (the last one might be smaller) */ + uint64_t memslot_size; + + /* Non-ours removed memory */ + PageRangeTree removed_guest, removed_both; + + /* Grand totals of removed memory (both ours and non-ours) */ + uint64_t removed_guest_ctr, removed_both_ctr; + + /* MEMORY_DEVICE props */ + uint64_t addr; + HostMemoryBackend *hostmem; + MemoryRegion *mr; +} HvBalloon; + +OBJECT_DEFINE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, HV_BALLOON, VMBUS_DEVICE, \ + { TYPE_MEMORY_DEVICE }, { }) + +#define HV_BALLOON_SET_STATE(hvb, news) \ + do { \ + assert(news != S_NO_CHANGE); \ + hv_balloon_state_set(hvb, news, # news); \ + } while (0) + +#define HV_BALLOON_STATE_DESC_SET(stdesc, news) \ + _hv_balloon_state_desc_set(stdesc, news, # news) + +#define HV_BALLOON_STATE_DESC_INIT \ + { \ + .state = S_NO_CHANGE, \ + } + +typedef struct HvBalloonReq { + VMBusChanReq vmreq; +} HvBalloonReq; + +/* total our memory includes parts currently removed from the guest */ +static uint64_t hv_balloon_total_our_ram(HvBalloon *balloon) +{ + if (!balloon->our_range) { + return 0; + } + + return balloon->our_range->range.added; +} + +/* TODO: unify the code below with virtio-balloon and cache the value */ +static int build_dimm_list(Object *obj, void *opaque) +{ + GSList **list = opaque; + + if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { + DeviceState *dev = DEVICE(obj); + if (dev->realized) { /* only realized DIMMs matter */ + *list = g_slist_prepend(*list, dev); + } + } + + object_child_foreach(obj, build_dimm_list, opaque); + return 0; +} + +static ram_addr_t get_current_ram_size(void) +{ + GSList *list = NULL, *item; + ram_addr_t size = current_machine->ram_size; + + build_dimm_list(qdev_get_machine(), &list); + for (item = list; item; item = g_slist_next(item)) { + Object *obj = OBJECT(item->data); + if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) + size += object_property_get_int(obj, PC_DIMM_SIZE_PROP, + &error_abort); + } + g_slist_free(list); + + return size; +} + +/* total RAM includes memory currently removed from the guest */ +static uint64_t hv_balloon_total_ram(HvBalloon *balloon) +{ + ram_addr_t ram_size = get_current_ram_size(); + uint64_t ram_size_pages = ram_size >> HV_BALLOON_PFN_SHIFT; + uint64_t our_ram_size_pages = hv_balloon_total_our_ram(balloon); + + assert(ram_size_pages > 0); + + return SUM_SATURATE_U64(ram_size_pages, our_ram_size_pages); +} + +/* + * calculating the total RAM size is a slow operation, + * avoid it as much as possible + */ +static uint64_t hv_balloon_total_removed_rs(HvBalloon *balloon, + uint64_t ram_size_pages) +{ + uint64_t total_removed; + + total_removed = SUM_SATURATE_U64(balloon->removed_guest_ctr, + balloon->removed_both_ctr); + + /* possible if guest returns pages outside actual RAM */ + if (total_removed > ram_size_pages) { + total_removed = ram_size_pages; + } + + return total_removed; +} + +/* Returns whether the state has actually changed */ +static bool hv_balloon_state_set(HvBalloon *balloon, + State newst, const char *newststr) +{ + if (newst == S_NO_CHANGE || balloon->state == newst) { + return false; + } + + balloon->state = newst; + trace_hv_balloon_state_change(newststr); + return true; +} + +static void _hv_balloon_state_desc_set(StateDesc *stdesc, + State newst, const char *newststr) +{ + /* state setting is only permitted on a freshly init desc */ + assert(stdesc->state == S_NO_CHANGE); + + assert(newst != S_NO_CHANGE); + + stdesc->state = newst; + stdesc->desc = newststr; +} + +static VMBusChannel *hv_balloon_get_channel_maybe(HvBalloon *balloon) +{ + return vmbus_device_channel(&balloon->parent, 0); +} + +static VMBusChannel *hv_balloon_get_channel(HvBalloon *balloon) +{ + VMBusChannel *chan; + + chan = hv_balloon_get_channel_maybe(balloon); + assert(chan != NULL); + return chan; +} + +static ssize_t hv_balloon_send_packet(VMBusChannel *chan, + struct dm_message *msg) +{ + int ret; + + ret = vmbus_channel_reserve(chan, 0, msg->hdr.size); + if (ret < 0) { + return ret; + } + + return vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND, + NULL, 0, msg, msg->hdr.size, false, + msg->hdr.trans_id); +} + +static bool hv_balloon_unballoon_get_source(HvBalloon *balloon, + PageRangeTree *dtree, + uint64_t **dctr, + bool *is_our_range) +{ + OurRange *our_range = OUR_RANGE(balloon->our_range); + + /* Try the boot memory first */ + if (g_tree_nnodes(balloon->removed_guest.t) > 0) { + *dtree = balloon->removed_guest; + *dctr = &balloon->removed_guest_ctr; + *is_our_range = false; + } else if (g_tree_nnodes(balloon->removed_both.t) > 0) { + *dtree = balloon->removed_both; + *dctr = &balloon->removed_both_ctr; + *is_our_range = false; + } else if (!our_range) { + return false; + } else if (!our_range_is_removed_tree_empty(our_range, false)) { + *dtree = our_range_get_removed_tree(our_range, false); + *dctr = &balloon->removed_guest_ctr; + *is_our_range = true; + } else if (!our_range_is_removed_tree_empty(our_range, true)) { + *dtree = our_range_get_removed_tree(our_range, true); + *dctr = &balloon->removed_both_ctr; + *is_our_range = true; + } else { + return false; + } + + return true; +} + +static void hv_balloon_unballoon_rb_wait(HvBalloon *balloon, StateDesc *stdesc) +{ + VMBusChannel *chan = hv_balloon_get_channel(balloon); + struct dm_unballoon_request *ur; + size_t ur_size = sizeof(*ur) + sizeof(ur->range_array[0]); + + assert(balloon->state == S_UNBALLOON_RB_WAIT); + + if (vmbus_channel_reserve(chan, 0, ur_size) < 0) { + return; + } + + HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_POSTING); +} + +static void hv_balloon_unballoon_posting(HvBalloon *balloon, StateDesc *stdesc) +{ + VMBusChannel *chan = hv_balloon_get_channel(balloon); + PageRangeTree dtree; + uint64_t *dctr; + bool our_range; + struct dm_unballoon_request *ur; + size_t ur_size = sizeof(*ur) + sizeof(ur->range_array[0]); + PageRange range; + bool bret; + ssize_t ret; + + assert(balloon->state == S_UNBALLOON_POSTING); + assert(balloon->unballoon_diff > 0); + + if (!hv_balloon_unballoon_get_source(balloon, &dtree, &dctr, &our_range)) { + error_report("trying to unballoon but nothing seems to be ballooned"); + /* + * there is little we can do as we might have already + * sent the guest a partial request we can't cancel + */ + return; + } + + assert(balloon->our_range || !our_range); + assert(dtree.t); + assert(dctr); + + ur = alloca(ur_size); + memset(ur, 0, ur_size); + ur->hdr.type = DM_UNBALLOON_REQUEST; + ur->hdr.size = ur_size; + ur->hdr.trans_id = balloon->trans_id; + + bret = hvb_page_range_tree_pop(dtree, &range, MIN(balloon->unballoon_diff, + HV_BALLOON_HA_CHUNK_PAGES)); + assert(bret); + /* TODO: madvise? */ + + *dctr -= range.count; + balloon->unballoon_diff -= range.count; + + ur->range_count = 1; + ur->range_array[0].finfo.start_page = range.start; + ur->range_array[0].finfo.page_cnt = range.count; + ur->more_pages = balloon->unballoon_diff > 0; + + trace_hv_balloon_outgoing_unballoon(ur->hdr.trans_id, + range.count, range.start, + balloon->unballoon_diff); + + if (ur->more_pages) { + HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_RB_WAIT); + } else { + HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_REPLY_WAIT); + } + + ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND, + NULL, 0, ur, ur_size, false, + ur->hdr.trans_id); + if (ret <= 0) { + error_report("error %zd when posting unballoon msg, expect problems", + ret); + } +} + +static bool hv_balloon_our_range_ensure(HvBalloon *balloon) +{ + uint64_t align; + MemoryRegion *hostmem_mr; + g_autoptr(OurRangeMemslots) our_range_memslots = NULL; + OurRange *our_range; + + if (balloon->our_range) { + return true; + } + + if (!balloon->hostmem) { + return false; + } + + align = (1 << balloon->caps.cap_bits.hot_add_alignment) * MiB; + assert(QEMU_IS_ALIGNED(balloon->addr, align)); + + hostmem_mr = host_memory_backend_get_memory(balloon->hostmem); + + our_range_memslots = hvb_our_range_memslots_new(balloon->addr, + balloon->mr, hostmem_mr, + OBJECT(balloon), + balloon->memslot_count, + balloon->memslot_size); + our_range = OUR_RANGE(our_range_memslots); + + if (hvb_page_range_tree_intree_any(balloon->removed_guest, + our_range->range.start, + our_range->range.count) || + hvb_page_range_tree_intree_any(balloon->removed_both, + our_range->range.start, + our_range->range.count)) { + error_report("some parts of the memory backend were already returned by the guest. this should not happen, please reboot the guest and try again"); + return false; + } + + trace_hv_balloon_our_range_add(our_range->range.count, + our_range->range.start); + + balloon->our_range = g_steal_pointer(&our_range_memslots); + return true; +} + +static void hv_balloon_hot_add_setup(HvBalloon *balloon, StateDesc *stdesc) +{ + /* need to make copy since it is in union with hot_add_range */ + uint64_t hot_add_diff = balloon->hot_add_diff; + PageRange *hot_add_range = &balloon->hot_add_range; + uint64_t align, our_range_remaining; + OurRange *our_range; + + assert(balloon->state == S_HOT_ADD_SETUP); + assert(hot_add_diff > 0); + + if (!hv_balloon_our_range_ensure(balloon)) { + goto ret_idle; + } + + our_range = OUR_RANGE(balloon->our_range); + + align = (1 << balloon->caps.cap_bits.hot_add_alignment) * + (MiB / HV_BALLOON_PAGE_SIZE); + + /* Absolute GPA in pages */ + hot_add_range->start = our_range_get_remaining_start(our_range); + assert(QEMU_IS_ALIGNED(hot_add_range->start, align)); + + our_range_remaining = our_range_get_remaining_size(our_range); + hot_add_range->count = MIN(our_range_remaining, hot_add_diff); + hot_add_range->count = QEMU_ALIGN_DOWN(hot_add_range->count, align); + if (hot_add_range->count == 0) { + goto ret_idle; + } + + hvb_our_range_memslots_ensure_mapped_additional(balloon->our_range, + hot_add_range->count); + + HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_RB_WAIT); + return; + +ret_idle: + HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE); +} + +static void hv_balloon_hot_add_rb_wait(HvBalloon *balloon, StateDesc *stdesc) +{ + VMBusChannel *chan = hv_balloon_get_channel(balloon); + struct dm_hot_add *ha; + size_t ha_size = sizeof(*ha) + sizeof(ha->range); + + assert(balloon->state == S_HOT_ADD_RB_WAIT); + + if (vmbus_channel_reserve(chan, 0, ha_size) < 0) { + return; + } + + HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_POSTING); +} + +static void hv_balloon_hot_add_posting(HvBalloon *balloon, StateDesc *stdesc) +{ + PageRange *hot_add_range = &balloon->hot_add_range; + uint64_t *current_count = &balloon->ha_current_count; + VMBusChannel *chan = hv_balloon_get_channel(balloon); + struct dm_hot_add *ha; + size_t ha_size = sizeof(*ha) + sizeof(ha->range); + union dm_mem_page_range *ha_region; + uint64_t align, chunk_max_size; + ssize_t ret; + + assert(balloon->state == S_HOT_ADD_POSTING); + assert(hot_add_range->count > 0); + + align = (1 << balloon->caps.cap_bits.hot_add_alignment) * + (MiB / HV_BALLOON_PAGE_SIZE); + if (align >= HV_BALLOON_HA_CHUNK_PAGES) { + /* + * If the required alignment is higher than the chunk size we let it + * override that size. + */ + chunk_max_size = align; + } else { + chunk_max_size = QEMU_ALIGN_DOWN(HV_BALLOON_HA_CHUNK_PAGES, align); + } + + /* + * hot_add_range->count starts aligned in hv_balloon_hot_add_setup(), + * then it is either reduced by subtracting aligned current_count or + * further hot-adds are prevented by marking the whole remaining our range + * as unusable in hv_balloon_handle_hot_add_response(). + */ + *current_count = MIN(hot_add_range->count, chunk_max_size); + + ha = alloca(ha_size); + ha_region = &(&ha->range)[1]; + memset(ha, 0, ha_size); + ha->hdr.type = DM_MEM_HOT_ADD_REQUEST; + ha->hdr.size = ha_size; + ha->hdr.trans_id = balloon->trans_id; + + ha->range.finfo.start_page = hot_add_range->start; + ha->range.finfo.page_cnt = *current_count; + ha_region->finfo.start_page = hot_add_range->start; + ha_region->finfo.page_cnt = ha->range.finfo.page_cnt; + + trace_hv_balloon_outgoing_hot_add(ha->hdr.trans_id, + *current_count, hot_add_range->start); + + ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND, + NULL, 0, ha, ha_size, false, + ha->hdr.trans_id); + if (ret <= 0) { + error_report("error %zd when posting hot add msg, expect problems", + ret); + } + + HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_REPLY_WAIT); +} + +static void hv_balloon_balloon_rb_wait(HvBalloon *balloon, StateDesc *stdesc) +{ + VMBusChannel *chan = hv_balloon_get_channel(balloon); + size_t bl_size = sizeof(struct dm_balloon); + + assert(balloon->state == S_BALLOON_RB_WAIT); + + if (vmbus_channel_reserve(chan, 0, bl_size) < 0) { + return; + } + + HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_POSTING); +} + +static void hv_balloon_balloon_posting(HvBalloon *balloon, StateDesc *stdesc) +{ + VMBusChannel *chan = hv_balloon_get_channel(balloon); + struct dm_balloon bl; + size_t bl_size = sizeof(bl); + ssize_t ret; + + assert(balloon->state == S_BALLOON_POSTING); + assert(balloon->balloon_diff > 0); + + memset(&bl, 0, sizeof(bl)); + bl.hdr.type = DM_BALLOON_REQUEST; + bl.hdr.size = bl_size; + bl.hdr.trans_id = balloon->trans_id; + bl.num_pages = MIN(balloon->balloon_diff, HV_BALLOON_HR_CHUNK_PAGES); + + trace_hv_balloon_outgoing_balloon(bl.hdr.trans_id, bl.num_pages, + balloon->balloon_diff); + + ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND, + NULL, 0, &bl, bl_size, false, + bl.hdr.trans_id); + if (ret <= 0) { + error_report("error %zd when posting balloon msg, expect problems", + ret); + } + + HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_REPLY_WAIT); +} + +static void hv_balloon_idle_state_process_target(HvBalloon *balloon, + StateDesc *stdesc) +{ + bool can_balloon = balloon->caps.cap_bits.balloon; + uint64_t ram_size_pages, total_removed; + + ram_size_pages = hv_balloon_total_ram(balloon); + total_removed = hv_balloon_total_removed_rs(balloon, ram_size_pages); + + /* + * we need to cache the values computed from the balloon target value when + * starting the adjustment procedure in case someone changes the target when + * the procedure is in progress + */ + if (balloon->target > ram_size_pages - total_removed) { + bool can_hot_add = balloon->caps.cap_bits.hot_add; + uint64_t target_diff = balloon->target - + (ram_size_pages - total_removed); + + balloon->unballoon_diff = MIN(target_diff, total_removed); + + if (can_hot_add) { + balloon->hot_add_diff = target_diff - balloon->unballoon_diff; + } else { + balloon->hot_add_diff = 0; + } + + if (balloon->unballoon_diff > 0) { + assert(can_balloon); + HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_RB_WAIT); + } else if (balloon->hot_add_diff > 0) { + HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_SETUP); + } + } else if (can_balloon && + balloon->target < ram_size_pages - total_removed) { + balloon->balloon_diff = ram_size_pages - total_removed - + balloon->target; + HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_RB_WAIT); + } +} + +static void hv_balloon_idle_state(HvBalloon *balloon, + StateDesc *stdesc) +{ + assert(balloon->state == S_IDLE); + + if (balloon->target_changed) { + balloon->target_changed = false; + hv_balloon_idle_state_process_target(balloon, stdesc); + return; + } +} + +static const struct { + void (*handler)(HvBalloon *balloon, StateDesc *stdesc); +} state_handlers[] = { + [S_IDLE].handler = hv_balloon_idle_state, + [S_BALLOON_POSTING].handler = hv_balloon_balloon_posting, + [S_BALLOON_RB_WAIT].handler = hv_balloon_balloon_rb_wait, + [S_UNBALLOON_POSTING].handler = hv_balloon_unballoon_posting, + [S_UNBALLOON_RB_WAIT].handler = hv_balloon_unballoon_rb_wait, + [S_HOT_ADD_SETUP].handler = hv_balloon_hot_add_setup, + [S_HOT_ADD_RB_WAIT].handler = hv_balloon_hot_add_rb_wait, + [S_HOT_ADD_POSTING].handler = hv_balloon_hot_add_posting, +}; + +static void hv_balloon_handle_state(HvBalloon *balloon, StateDesc *stdesc) +{ + if (balloon->state >= ARRAY_SIZE(state_handlers) || + !state_handlers[balloon->state].handler) { + return; + } + + state_handlers[balloon->state].handler(balloon, stdesc); +} + +static void hv_balloon_remove_response_insert_range(PageRangeTree tree, + const PageRange *range, + uint64_t *ctr1, + uint64_t *ctr2, + uint64_t *ctr3) +{ + uint64_t dupcount, effcount; + + if (range->count == 0) { + return; + } + + dupcount = 0; + hvb_page_range_tree_insert(tree, range->start, range->count, &dupcount); + + assert(dupcount <= range->count); + effcount = range->count - dupcount; + + *ctr1 += effcount; + *ctr2 += effcount; + if (ctr3) { + *ctr3 += effcount; + } +} + +static void hv_balloon_remove_response_handle_range(HvBalloon *balloon, + PageRange *range, + bool both, + uint64_t *removedctr) +{ + OurRange *our_range = OUR_RANGE(balloon->our_range); + PageRangeTree globaltree = + both ? balloon->removed_both : balloon->removed_guest; + uint64_t *globalctr = + both ? &balloon->removed_both_ctr : &balloon->removed_guest_ctr; + PageRange rangeeff; + + if (range->count == 0) { + return; + } + + trace_hv_balloon_remove_response(range->count, range->start, both); + + if (our_range) { + /* Includes the not-yet-hot-added and unusable parts. */ + rangeeff = our_range->range; + } else { + rangeeff.start = rangeeff.count = 0; + } + + if (page_range_intersection_size(range, rangeeff.start, rangeeff.count) > 0) { + PageRangeTree ourtree = our_range_get_removed_tree(our_range, both); + PageRange rangehole, rangecommon; + uint64_t ourremoved = 0; + + /* process the hole before our range, if it exists */ + page_range_part_before(range, rangeeff.start, &rangehole); + hv_balloon_remove_response_insert_range(globaltree, &rangehole, + globalctr, removedctr, NULL); + if (rangehole.count > 0) { + trace_hv_balloon_remove_response_hole(rangehole.count, + rangehole.start, + range->count, range->start, + rangeeff.start, both); + } + + /* process our part */ + page_range_intersect(range, rangeeff.start, rangeeff.count, + &rangecommon); + hv_balloon_remove_response_insert_range(ourtree, &rangecommon, + globalctr, removedctr, + &ourremoved); + if (rangecommon.count > 0) { + trace_hv_balloon_remove_response_common(rangecommon.count, + rangecommon.start, + range->count, range->start, + rangeeff.count, + rangeeff.start, ourremoved, + both); + } + + /* calculate what's left after our range */ + rangecommon = *range; + page_range_part_after(&rangecommon, rangeeff.start, rangeeff.count, + range); + } + + /* process the remainder of the range that lies after our range */ + if (range->count > 0) { + hv_balloon_remove_response_insert_range(globaltree, range, + globalctr, removedctr, NULL); + trace_hv_balloon_remove_response_remainder(range->count, range->start, + both); + range->count = 0; + } +} + +static void hv_balloon_remove_response_handle_pages(HvBalloon *balloon, + PageRange *range, + uint64_t start, + uint64_t count, + bool both, + uint64_t *removedctr) +{ + assert(count > 0); + + /* + * if there is an existing range that the new range can't be joined to + * dump it into tree(s) + */ + if (range->count > 0 && !page_range_joinable(range, start, count)) { + hv_balloon_remove_response_handle_range(balloon, range, both, + removedctr); + } + + if (range->count == 0) { + range->start = start; + range->count = count; + } else if (page_range_joinable_left(range, start, count)) { + range->start = start; + range->count += count; + } else { /* page_range_joinable_right() */ + range->count += count; + } +} + +static gboolean hv_balloon_handle_remove_host_addr_node(gpointer key, + gpointer value, + gpointer data) +{ + PageRange *range = value; + uint64_t pageoff; + + for (pageoff = 0; pageoff < range->count; ) { + uint64_t addr_64 = (range->start + pageoff) * HV_BALLOON_PAGE_SIZE; + void *addr; + RAMBlock *rb; + ram_addr_t rb_offset; + size_t rb_page_size; + size_t discard_size; + + assert(addr_64 <= UINTPTR_MAX); + addr = (void *)((uintptr_t)addr_64); + rb = qemu_ram_block_from_host(addr, false, &rb_offset); + rb_page_size = qemu_ram_pagesize(rb); + + if (rb_page_size != HV_BALLOON_PAGE_SIZE) { + /* TODO: these should end in "removed_guest" */ + warn_report("guest reported removed page backed by unsupported page size %zu", + rb_page_size); + pageoff++; + continue; + } + + discard_size = MIN(range->count - pageoff, + (rb->max_length - rb_offset) / + HV_BALLOON_PAGE_SIZE); + discard_size = MAX(discard_size, 1); + + if (ram_block_discard_range(rb, rb_offset, discard_size * + HV_BALLOON_PAGE_SIZE) != 0) { + warn_report("guest reported removed page failed discard"); + } + + pageoff += discard_size; + } + + return false; +} + +static void hv_balloon_handle_remove_host_addr_tree(PageRangeTree tree) +{ + g_tree_foreach(tree.t, hv_balloon_handle_remove_host_addr_node, NULL); +} + +static int hv_balloon_handle_remove_section(PageRangeTree tree, + const MemoryRegionSection *section, + uint64_t count) +{ + void *addr = memory_region_get_ram_ptr(section->mr) + + section->offset_within_region; + uint64_t addr_page; + + assert(count > 0); + + if ((uintptr_t)addr % HV_BALLOON_PAGE_SIZE) { + warn_report("guest reported removed pages at an unaligned host addr %p", + addr); + return -EINVAL; + } + + addr_page = (uintptr_t)addr / HV_BALLOON_PAGE_SIZE; + hvb_page_range_tree_insert(tree, addr_page, count, NULL); + + return 0; +} + +static void hv_balloon_handle_remove_ranges(HvBalloon *balloon, + union dm_mem_page_range ranges[], + uint32_t count) +{ + uint64_t removedcnt; + PageRangeTree removed_host_addr; + PageRange range_guest, range_both; + + hvb_page_range_tree_init(&removed_host_addr); + range_guest.count = range_both.count = removedcnt = 0; + for (unsigned int ctr = 0; ctr < count; ctr++) { + union dm_mem_page_range *mr = &ranges[ctr]; + hwaddr pa; + MemoryRegionSection section; + + for (unsigned int offset = 0; offset < mr->finfo.page_cnt; ) { + int ret; + uint64_t pageno = mr->finfo.start_page + offset; + uint64_t pagecnt = 1; + + pa = (hwaddr)pageno << HV_BALLOON_PFN_SHIFT; + section = memory_region_find(get_system_memory(), pa, + (mr->finfo.page_cnt - offset) * + HV_BALLOON_PAGE_SIZE); + if (!section.mr) { + warn_report("guest reported removed page %"PRIu64" not found in RAM", + pageno); + ret = -EINVAL; + goto finish_page; + } + + pagecnt = int128_get64(section.size) / HV_BALLOON_PAGE_SIZE; + if (pagecnt <= 0) { + warn_report("guest reported removed page %"PRIu64" in a section smaller than page size", + pageno); + pagecnt = 1; /* skip the whole page */ + ret = -EINVAL; + goto finish_page; + } + + if (!memory_region_is_ram(section.mr) || + memory_region_is_rom(section.mr) || + memory_region_is_romd(section.mr)) { + warn_report("guest reported removed page %"PRIu64" in a section that is not an ordinary RAM", + pageno); + ret = -EINVAL; + goto finish_page; + } + + ret = hv_balloon_handle_remove_section(removed_host_addr, §ion, + pagecnt); + + finish_page: + if (ret == 0) { + hv_balloon_remove_response_handle_pages(balloon, + &range_both, + pageno, pagecnt, + true, &removedcnt); + } else { + hv_balloon_remove_response_handle_pages(balloon, + &range_guest, + pageno, pagecnt, + false, &removedcnt); + } + + if (section.mr) { + memory_region_unref(section.mr); + } + + offset += pagecnt; + } + } + + hv_balloon_remove_response_handle_range(balloon, &range_both, true, + &removedcnt); + hv_balloon_remove_response_handle_range(balloon, &range_guest, false, + &removedcnt); + + hv_balloon_handle_remove_host_addr_tree(removed_host_addr); + hvb_page_range_tree_destroy(&removed_host_addr); + + if (removedcnt > balloon->balloon_diff) { + warn_report("guest reported more pages removed than currently pending (%"PRIu64" vs %"PRIu64")", + removedcnt, balloon->balloon_diff); + balloon->balloon_diff = 0; + } else { + balloon->balloon_diff -= removedcnt; + } +} + +static bool hv_balloon_handle_msg_size(HvBalloonReq *req, size_t minsize, + const char *msgname) +{ + VMBusChanReq *vmreq = &req->vmreq; + uint32_t msglen = vmreq->msglen; + + if (msglen >= minsize) { + return true; + } + + warn_report("%s message too short (%u vs %zu), ignoring", msgname, + (unsigned int)msglen, minsize); + return false; +} + +static void hv_balloon_handle_version_request(HvBalloon *balloon, + HvBalloonReq *req, + StateDesc *stdesc) +{ + VMBusChanReq *vmreq = &req->vmreq; + struct dm_version_request *msgVr = vmreq->msg; + struct dm_version_response respVr; + + if (balloon->state != S_VERSION) { + warn_report("unexpected DM_VERSION_REQUEST in %d state", + balloon->state); + return; + } + + if (!hv_balloon_handle_msg_size(req, sizeof(*msgVr), + "DM_VERSION_REQUEST")) { + return; + } + + trace_hv_balloon_incoming_version(msgVr->version.major_version, + msgVr->version.minor_version); + + memset(&respVr, 0, sizeof(respVr)); + respVr.hdr.type = DM_VERSION_RESPONSE; + respVr.hdr.size = sizeof(respVr); + respVr.hdr.trans_id = msgVr->hdr.trans_id; + respVr.is_accepted = msgVr->version.version >= DYNMEM_PROTOCOL_VERSION_1 && + msgVr->version.version <= DYNMEM_PROTOCOL_VERSION_3; + + hv_balloon_send_packet(vmreq->chan, (struct dm_message *)&respVr); + + if (respVr.is_accepted) { + HV_BALLOON_STATE_DESC_SET(stdesc, S_CAPS); + } +} + +static void hv_balloon_handle_caps_report(HvBalloon *balloon, + HvBalloonReq *req, + StateDesc *stdesc) +{ + VMBusChanReq *vmreq = &req->vmreq; + struct dm_capabilities *msgCap = vmreq->msg; + struct dm_capabilities_resp_msg respCap; + + if (balloon->state != S_CAPS) { + warn_report("unexpected DM_CAPABILITIES_REPORT in %d state", + balloon->state); + return; + } + + if (!hv_balloon_handle_msg_size(req, sizeof(*msgCap), + "DM_CAPABILITIES_REPORT")) { + return; + } + + trace_hv_balloon_incoming_caps(msgCap->caps.caps); + balloon->caps = msgCap->caps; + + memset(&respCap, 0, sizeof(respCap)); + respCap.hdr.type = DM_CAPABILITIES_RESPONSE; + respCap.hdr.size = sizeof(respCap); + respCap.hdr.trans_id = msgCap->hdr.trans_id; + respCap.is_accepted = 1; + respCap.hot_remove = 1; + respCap.suppress_pressure_reports = !balloon->status_report.enabled; + hv_balloon_send_packet(vmreq->chan, (struct dm_message *)&respCap); + + timer_mod(&balloon->post_init_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + HV_BALLOON_POST_INIT_WAIT); + + HV_BALLOON_STATE_DESC_SET(stdesc, S_POST_INIT_WAIT); +} + +static void hv_balloon_handle_status_report(HvBalloon *balloon, + HvBalloonReq *req) +{ + VMBusChanReq *vmreq = &req->vmreq; + struct dm_status *msgStatus = vmreq->msg; + + if (!hv_balloon_handle_msg_size(req, sizeof(*msgStatus), + "DM_STATUS_REPORT")) { + return; + } + + if (!balloon->status_report.enabled) { + return; + } + + balloon->status_report.committed = msgStatus->num_committed; + balloon->status_report.committed *= HV_BALLOON_PAGE_SIZE; + balloon->status_report.available = msgStatus->num_avail; + balloon->status_report.available *= HV_BALLOON_PAGE_SIZE; + balloon->status_report.received = true; + + qapi_event_send_hv_balloon_status_report(balloon->status_report.committed, + balloon->status_report.available); +} + +HvBalloonInfo *qmp_query_hv_balloon_status_report(Error **errp) +{ + HvBalloon *balloon; + HvBalloonInfo *info; + + balloon = HV_BALLOON(object_resolve_path_type("", TYPE_HV_BALLOON, NULL)); + if (!balloon) { + error_setg(errp, "no %s device present", TYPE_HV_BALLOON); + return NULL; + } + + if (!balloon->status_report.enabled) { + error_setg(errp, "guest memory status reporting not enabled"); + return NULL; + } + + if (!balloon->status_report.received) { + error_setg(errp, "no guest memory status report received yet"); + return NULL; + } + + info = g_malloc0(sizeof(*info)); + info->committed = balloon->status_report.committed; + info->available = balloon->status_report.available; + return info; +} + +static void hv_balloon_handle_unballoon_response(HvBalloon *balloon, + HvBalloonReq *req, + StateDesc *stdesc) +{ + VMBusChanReq *vmreq = &req->vmreq; + struct dm_unballoon_response *msgUrR = vmreq->msg; + + if (balloon->state != S_UNBALLOON_REPLY_WAIT) { + warn_report("unexpected DM_UNBALLOON_RESPONSE in %d state", + balloon->state); + return; + } + + if (!hv_balloon_handle_msg_size(req, sizeof(*msgUrR), + "DM_UNBALLOON_RESPONSE")) + return; + + trace_hv_balloon_incoming_unballoon(msgUrR->hdr.trans_id); + + balloon->trans_id++; + + if (balloon->hot_add_diff > 0) { + bool can_hot_add = balloon->caps.cap_bits.hot_add; + + assert(can_hot_add); + HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_SETUP); + } else { + HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE); + } +} + +static void hv_balloon_handle_hot_add_response(HvBalloon *balloon, + HvBalloonReq *req, + StateDesc *stdesc) +{ + PageRange *hot_add_range = &balloon->hot_add_range; + VMBusChanReq *vmreq = &req->vmreq; + struct dm_hot_add_response *msgHaR = vmreq->msg; + OurRange *our_range; + + if (balloon->state != S_HOT_ADD_REPLY_WAIT) { + warn_report("unexpected DM_HOT_ADD_RESPONSE in %d state", + balloon->state); + return; + } + + assert(balloon->our_range); + our_range = OUR_RANGE(balloon->our_range); + + if (!hv_balloon_handle_msg_size(req, sizeof(*msgHaR), + "DM_HOT_ADD_RESPONSE")) + return; + + trace_hv_balloon_incoming_hot_add(msgHaR->hdr.trans_id, msgHaR->result, + msgHaR->page_count); + + balloon->trans_id++; + + if (msgHaR->result) { + if (msgHaR->page_count > balloon->ha_current_count) { + warn_report("DM_HOT_ADD_RESPONSE page count higher than requested (%"PRIu32" vs %"PRIu64")", + msgHaR->page_count, balloon->ha_current_count); + msgHaR->page_count = balloon->ha_current_count; + } + + hvb_our_range_mark_added(our_range, msgHaR->page_count); + hot_add_range->start += msgHaR->page_count; + hot_add_range->count -= msgHaR->page_count; + } + + if (!msgHaR->result || msgHaR->page_count < balloon->ha_current_count) { + /* + * the current planned range was only partially hot-added, take note + * how much of it remains and don't attempt any further hot adds + */ + our_range_mark_remaining_unusable(our_range); + + goto ret_idle; + } + + /* any pages remaining to hot-add in our range? */ + if (hot_add_range->count > 0) { + HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_RB_WAIT); + return; + } + +ret_idle: + HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE); +} + +static void hv_balloon_handle_balloon_response(HvBalloon *balloon, + HvBalloonReq *req, + StateDesc *stdesc) +{ + VMBusChanReq *vmreq = &req->vmreq; + struct dm_balloon_response *msgBR = vmreq->msg; + + if (balloon->state != S_BALLOON_REPLY_WAIT) { + warn_report("unexpected DM_BALLOON_RESPONSE in %d state", + balloon->state); + return; + } + + if (!hv_balloon_handle_msg_size(req, sizeof(*msgBR), + "DM_BALLOON_RESPONSE")) + return; + + trace_hv_balloon_incoming_balloon(msgBR->hdr.trans_id, msgBR->range_count, + msgBR->more_pages); + + if (vmreq->msglen < sizeof(*msgBR) + + (uint64_t)sizeof(msgBR->range_array[0]) * msgBR->range_count) { + warn_report("DM_BALLOON_RESPONSE too short for the range count"); + return; + } + + if (msgBR->range_count == 0) { + /* The guest is already at its minimum size */ + balloon->balloon_diff = 0; + goto ret_end_trans; + } else { + hv_balloon_handle_remove_ranges(balloon, + msgBR->range_array, + msgBR->range_count); + } + + /* More responses expected? */ + if (msgBR->more_pages) { + return; + } + +ret_end_trans: + balloon->trans_id++; + + if (balloon->balloon_diff > 0) { + HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_RB_WAIT); + } else { + HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE); + } +} + +static void hv_balloon_handle_packet(HvBalloon *balloon, HvBalloonReq *req, + StateDesc *stdesc) +{ + VMBusChanReq *vmreq = &req->vmreq; + struct dm_message *msg = vmreq->msg; + + if (vmreq->msglen < sizeof(msg->hdr)) { + return; + } + + switch (msg->hdr.type) { + case DM_VERSION_REQUEST: + hv_balloon_handle_version_request(balloon, req, stdesc); + break; + + case DM_CAPABILITIES_REPORT: + hv_balloon_handle_caps_report(balloon, req, stdesc); + break; + + case DM_STATUS_REPORT: + hv_balloon_handle_status_report(balloon, req); + break; + + case DM_MEM_HOT_ADD_RESPONSE: + hv_balloon_handle_hot_add_response(balloon, req, stdesc); + break; + + case DM_UNBALLOON_RESPONSE: + hv_balloon_handle_unballoon_response(balloon, req, stdesc); + break; + + case DM_BALLOON_RESPONSE: + hv_balloon_handle_balloon_response(balloon, req, stdesc); + break; + + default: + warn_report("unknown DM message %u", msg->hdr.type); + break; + } +} + +static bool hv_balloon_recv_channel(HvBalloon *balloon, StateDesc *stdesc) +{ + VMBusChannel *chan; + HvBalloonReq *req; + + if (balloon->state == S_WAIT_RESET || + balloon->state == S_POST_RESET_CLOSED) { + return false; + } + + chan = hv_balloon_get_channel(balloon); + if (vmbus_channel_recv_start(chan)) { + return false; + } + + while ((req = vmbus_channel_recv_peek(chan, sizeof(*req)))) { + hv_balloon_handle_packet(balloon, req, stdesc); + vmbus_free_req(req); + vmbus_channel_recv_pop(chan); + + if (stdesc->state != S_NO_CHANGE) { + break; + } + } + + return vmbus_channel_recv_done(chan) > 0; +} + +/* old state handler -> new state transition (potential) */ +static bool hv_balloon_event_loop_state(HvBalloon *balloon) +{ + StateDesc state_new = HV_BALLOON_STATE_DESC_INIT; + + hv_balloon_handle_state(balloon, &state_new); + return hv_balloon_state_set(balloon, state_new.state, state_new.desc); +} + +/* VMBus message -> new state transition (potential) */ +static bool hv_balloon_event_loop_recv(HvBalloon *balloon) +{ + StateDesc state_new = HV_BALLOON_STATE_DESC_INIT; + bool any_recv, state_changed; + + any_recv = hv_balloon_recv_channel(balloon, &state_new); + state_changed = hv_balloon_state_set(balloon, + state_new.state, state_new.desc); + + return state_changed || any_recv; +} + +static void hv_balloon_event_loop(HvBalloon *balloon) +{ + bool state_repeat, recv_repeat; + + do { + state_repeat = hv_balloon_event_loop_state(balloon); + recv_repeat = hv_balloon_event_loop_recv(balloon); + } while (state_repeat || recv_repeat); +} + +static void hv_balloon_vmdev_chan_notify(VMBusChannel *chan) +{ + HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan)); + + hv_balloon_event_loop(balloon); +} + +static void hv_balloon_stat(void *opaque, BalloonInfo *info) +{ + HvBalloon *balloon = opaque; + info->actual = (hv_balloon_total_ram(balloon) - balloon->removed_both_ctr) + << HV_BALLOON_PFN_SHIFT; +} + +static void hv_balloon_to_target(void *opaque, ram_addr_t target) +{ + HvBalloon *balloon = opaque; + uint64_t target_pages = target >> HV_BALLOON_PFN_SHIFT; + + if (!target_pages) { + return; + } + + /* + * always set target_changed, even with unchanged target, as the user + * might be asking us to try again reaching it + */ + balloon->target = target_pages; + balloon->target_changed = true; + + hv_balloon_event_loop(balloon); +} + +static int hv_balloon_vmdev_open_channel(VMBusChannel *chan) +{ + HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan)); + + if (balloon->state != S_POST_RESET_CLOSED) { + warn_report("guest trying to open a DM channel in invalid %d state", + balloon->state); + return -EINVAL; + } + + HV_BALLOON_SET_STATE(balloon, S_VERSION); + hv_balloon_event_loop(balloon); + + return 0; +} + +static void hv_balloon_vmdev_close_channel(VMBusChannel *chan) +{ + HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan)); + + timer_del(&balloon->post_init_timer); + + /* Don't report stale data */ + balloon->status_report.received = false; + + HV_BALLOON_SET_STATE(balloon, S_WAIT_RESET); + hv_balloon_event_loop(balloon); +} + +static void hv_balloon_post_init_timer(void *opaque) +{ + HvBalloon *balloon = opaque; + + if (balloon->state != S_POST_INIT_WAIT) { + return; + } + + HV_BALLOON_SET_STATE(balloon, S_IDLE); + hv_balloon_event_loop(balloon); +} + +static void hv_balloon_system_reset_unrealize_common(HvBalloon *balloon) +{ + g_clear_pointer(&balloon->our_range, hvb_our_range_memslots_free); +} + +static void hv_balloon_system_reset(void *opaque) +{ + HvBalloon *balloon = HV_BALLOON(opaque); + + hv_balloon_system_reset_unrealize_common(balloon); +} + +static void hv_balloon_ensure_mr(HvBalloon *balloon) +{ + MemoryRegion *hostmem_mr; + + assert(balloon->hostmem); + + if (balloon->mr) { + return; + } + + hostmem_mr = host_memory_backend_get_memory(balloon->hostmem); + + balloon->mr = g_new0(MemoryRegion, 1); + memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON, + memory_region_size(hostmem_mr)); + + /* + * The VM can indicate an alignment up to 32 GiB. Memory device core can + * usually only handle/guarantee 1 GiB alignment. The user will have to + * specify a larger maxmem eventually. + * + * The memory device core will warn the user in case maxmem might have to be + * increased and will fail plugging the device if there is not sufficient + * space after alignment. + * + * TODO: we could do the alignment ourselves in a slightly bigger region. + * But this feels better, although the warning might be annoying. Maybe + * we can optimize that in the future (e.g., with such a device on the + * cmdline place/size the device memory region differently. + */ + balloon->mr->align = MAX(32 * GiB, memory_region_get_alignment(hostmem_mr)); +} + +static void hv_balloon_free_mr(HvBalloon *balloon) +{ + if (!balloon->mr) { + return; + } + + object_unparent(OBJECT(balloon->mr)); + g_clear_pointer(&balloon->mr, g_free); +} + +static void hv_balloon_vmdev_realize(VMBusDevice *vdev, Error **errp) +{ + ERRP_GUARD(); + HvBalloon *balloon = HV_BALLOON(vdev); + int ret; + + balloon->state = S_WAIT_RESET; + + ret = qemu_add_balloon_handler(hv_balloon_to_target, hv_balloon_stat, + balloon); + if (ret < 0) { + /* This also protects against having multiple hv-balloon instances */ + error_setg(errp, "Only one balloon device is supported"); + return; + } + + if (balloon->hostmem) { + if (host_memory_backend_is_mapped(balloon->hostmem)) { + Object *obj = OBJECT(balloon->hostmem); + + error_setg(errp, "'%s' property specifies a busy memdev: %s", + HV_BALLOON_MEMDEV_PROP, + object_get_canonical_path_component(obj)); + goto out_balloon_handler; + } + + hv_balloon_ensure_mr(balloon); + + /* This is rather unlikely to happen, but let's still check for it. */ + if (!QEMU_IS_ALIGNED(memory_region_size(balloon->mr), + HV_BALLOON_PAGE_SIZE)) { + error_setg(errp, "'%s' property memdev size has to be a multiple of 0x%" PRIx64, + HV_BALLOON_MEMDEV_PROP, (uint64_t)HV_BALLOON_PAGE_SIZE); + goto out_balloon_handler; + } + + host_memory_backend_set_mapped(balloon->hostmem, true); + vmstate_register_ram(host_memory_backend_get_memory(balloon->hostmem), + DEVICE(balloon)); + } else if (balloon->addr) { + error_setg(errp, "'%s' property must not be set without a memdev", + HV_BALLOON_MEMDEV_PROP); + goto out_balloon_handler; + } + + timer_init_ms(&balloon->post_init_timer, QEMU_CLOCK_VIRTUAL, + hv_balloon_post_init_timer, balloon); + + qemu_register_reset(hv_balloon_system_reset, balloon); + + return; + +out_balloon_handler: + qemu_remove_balloon_handler(balloon); +} + +/* + * VMBus device reset has to be implemented in case the guest decides to + * disconnect and reconnect to the VMBus without rebooting the whole system. + * + * However, the hot-added memory can't be removed here as Windows keeps on using + * it until the system is restarted, even after disconnecting from the VMBus. + */ +static void hv_balloon_vmdev_reset(VMBusDevice *vdev) +{ + HvBalloon *balloon = HV_BALLOON(vdev); + + if (balloon->state == S_POST_RESET_CLOSED) { + return; + } + + if (balloon->our_range) { + hvb_our_range_clear_removed_trees(OUR_RANGE(balloon->our_range)); + } + + hvb_page_range_tree_destroy(&balloon->removed_guest); + hvb_page_range_tree_destroy(&balloon->removed_both); + hvb_page_range_tree_init(&balloon->removed_guest); + hvb_page_range_tree_init(&balloon->removed_both); + + balloon->trans_id = 0; + balloon->removed_guest_ctr = 0; + balloon->removed_both_ctr = 0; + + HV_BALLOON_SET_STATE(balloon, S_POST_RESET_CLOSED); + hv_balloon_event_loop(balloon); +} + +/* + * Clean up things that were (possibly) allocated pre-realization, for example + * from memory_device_pre_plug(), so we don't leak them if the device don't + * actually get realized in the end. + */ +static void hv_balloon_unrealize_finalize_common(HvBalloon *balloon) +{ + hv_balloon_free_mr(balloon); + balloon->addr = 0; + + balloon->memslot_count = 0; +} + +static void hv_balloon_vmdev_unrealize(VMBusDevice *vdev) +{ + HvBalloon *balloon = HV_BALLOON(vdev); + + qemu_unregister_reset(hv_balloon_system_reset, balloon); + + hv_balloon_system_reset_unrealize_common(balloon); + + qemu_remove_balloon_handler(balloon); + + if (balloon->hostmem) { + vmstate_unregister_ram(host_memory_backend_get_memory(balloon->hostmem), + DEVICE(balloon)); + host_memory_backend_set_mapped(balloon->hostmem, false); + } + + hvb_page_range_tree_destroy(&balloon->removed_guest); + hvb_page_range_tree_destroy(&balloon->removed_both); + + hv_balloon_unrealize_finalize_common(balloon); +} + +static uint64_t hv_balloon_md_get_addr(const MemoryDeviceState *md) +{ + return object_property_get_uint(OBJECT(md), HV_BALLOON_ADDR_PROP, + &error_abort); +} + +static void hv_balloon_md_set_addr(MemoryDeviceState *md, uint64_t addr, + Error **errp) +{ + object_property_set_uint(OBJECT(md), HV_BALLOON_ADDR_PROP, addr, errp); +} + +static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md, + Error **errp) +{ + HvBalloon *balloon = HV_BALLOON(md); + + if (!balloon->hostmem) { + return NULL; + } + + hv_balloon_ensure_mr(balloon); + + return balloon->mr; +} + +static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md, + MemoryDeviceInfo *info) +{ + HvBalloonDeviceInfo *hi = g_new0(HvBalloonDeviceInfo, 1); + const HvBalloon *balloon = HV_BALLOON(md); + DeviceState *dev = DEVICE(md); + + if (dev->id) { + hi->id = g_strdup(dev->id); + } + + if (balloon->hostmem) { + hi->memdev = object_get_canonical_path(OBJECT(balloon->hostmem)); + hi->memaddr = balloon->addr; + hi->has_memaddr = true; + hi->max_size = memory_region_size(balloon->mr); + /* TODO: expose current provided size or something else? */ + } else { + hi->max_size = 0; + } + + info->u.hv_balloon.data = hi; + info->type = MEMORY_DEVICE_INFO_KIND_HV_BALLOON; +} + +static void hv_balloon_decide_memslots(MemoryDeviceState *md, + unsigned int limit) +{ + HvBalloon *balloon = HV_BALLOON(md); + MemoryRegion *hostmem_mr; + uint64_t region_size, memslot_size, memslots; + + /* We're called exactly once, before realizing the device. */ + assert(!balloon->memslot_count); + + /* We should not be called if we don't have a memory backend */ + assert(balloon->hostmem); + + hostmem_mr = host_memory_backend_get_memory(balloon->hostmem); + region_size = memory_region_size(hostmem_mr); + + assert(region_size > 0); + memslot_size = QEMU_ALIGN_UP(region_size / limit, + HV_BALLOON_HA_MEMSLOT_SIZE_ALIGN); + memslots = QEMU_ALIGN_UP(region_size, memslot_size) / memslot_size; + + if (memslots > 1) { + balloon->memslot_size = memslot_size; + } else { + balloon->memslot_size = region_size; + } + + assert(memslots <= UINT_MAX); + balloon->memslot_count = memslots; +} + +static unsigned int hv_balloon_get_memslots(MemoryDeviceState *md) +{ + const HvBalloon *balloon = HV_BALLOON(md); + + /* We're called after setting the suggested limit. */ + assert(balloon->memslot_count > 0); + + return balloon->memslot_count; +} + +static void hv_balloon_init(Object *obj) +{ +} + +static void hv_balloon_finalize(Object *obj) +{ + HvBalloon *balloon = HV_BALLOON(obj); + + hv_balloon_unrealize_finalize_common(balloon); +} + +static Property hv_balloon_properties[] = { + DEFINE_PROP_BOOL("status-report", HvBalloon, + status_report.enabled, false), + + /* MEMORY_DEVICE props */ + DEFINE_PROP_LINK(HV_BALLOON_MEMDEV_PROP, HvBalloon, hostmem, + TYPE_MEMORY_BACKEND, HostMemoryBackend *), + DEFINE_PROP_UINT64(HV_BALLOON_ADDR_PROP, HvBalloon, addr, 0), + + DEFINE_PROP_END_OF_LIST(), +}; + +static void hv_balloon_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VMBusDeviceClass *vdc = VMBUS_DEVICE_CLASS(klass); + MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass); + + device_class_set_props(dc, hv_balloon_properties); + qemu_uuid_parse(HV_BALLOON_GUID, &vdc->classid); + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + + vdc->vmdev_realize = hv_balloon_vmdev_realize; + vdc->vmdev_unrealize = hv_balloon_vmdev_unrealize; + vdc->vmdev_reset = hv_balloon_vmdev_reset; + vdc->open_channel = hv_balloon_vmdev_open_channel; + vdc->close_channel = hv_balloon_vmdev_close_channel; + vdc->chan_notify_cb = hv_balloon_vmdev_chan_notify; + + mdc->get_addr = hv_balloon_md_get_addr; + mdc->set_addr = hv_balloon_md_set_addr; + mdc->get_plugged_size = memory_device_get_region_size; + mdc->get_memory_region = hv_balloon_md_get_memory_region; + mdc->decide_memslots = hv_balloon_decide_memslots; + mdc->get_memslots = hv_balloon_get_memslots; + mdc->fill_device_info = hv_balloon_md_fill_device_info; +} diff --git a/hw/hyperv/meson.build b/hw/hyperv/meson.build index b43f119ea5..d3d2668c71 100644 --- a/hw/hyperv/meson.build +++ b/hw/hyperv/meson.build @@ -2,3 +2,4 @@ specific_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c')) specific_ss.add(when: 'CONFIG_HYPERV_TESTDEV', if_true: files('hyperv_testdev.c')) specific_ss.add(when: 'CONFIG_VMBUS', if_true: files('vmbus.c')) specific_ss.add(when: 'CONFIG_SYNDBG', if_true: files('syndbg.c')) +specific_ss.add(when: 'CONFIG_HV_BALLOON', if_true: files('hv-balloon.c', 'hv-balloon-page_range_tree.c', 'hv-balloon-our_range_memslots.c'), if_false: files('hv-balloon-stub.c')) diff --git a/hw/hyperv/trace-events b/hw/hyperv/trace-events index b4c35ca8e3..7963c215b1 100644 --- a/hw/hyperv/trace-events +++ b/hw/hyperv/trace-events @@ -16,3 +16,21 @@ vmbus_gpadl_torndown(uint32_t gpadl_id) "gpadl #%d" vmbus_open_channel(uint32_t chan_id, uint32_t gpadl_id, uint32_t target_vp) "channel #%d gpadl #%d target vp %d" vmbus_channel_open(uint32_t chan_id, uint32_t status) "channel #%d status %d" vmbus_close_channel(uint32_t chan_id) "channel #%d" + +# hv-balloon +hv_balloon_state_change(const char *tostr) "-> %s" +hv_balloon_incoming_version(uint16_t major, uint16_t minor) "incoming proto version %u.%u" +hv_balloon_incoming_caps(uint32_t caps) "incoming caps 0x%x" +hv_balloon_outgoing_unballoon(uint32_t trans_id, uint64_t count, uint64_t start, uint64_t rempages) "posting unballoon %"PRIu32" for %"PRIu64" @ 0x%"PRIx64", remaining %"PRIu64 +hv_balloon_incoming_unballoon(uint32_t trans_id) "incoming unballoon response %"PRIu32 +hv_balloon_outgoing_hot_add(uint32_t trans_id, uint64_t count, uint64_t start) "posting hot add %"PRIu32" for %"PRIu64" @ 0x%"PRIx64 +hv_balloon_incoming_hot_add(uint32_t trans_id, uint32_t result, uint32_t count) "incoming hot add response %"PRIu32", result %"PRIu32", count %"PRIu32 +hv_balloon_outgoing_balloon(uint32_t trans_id, uint64_t count, uint64_t rempages) "posting balloon %"PRIu32" for %"PRIu64", remaining %"PRIu64 +hv_balloon_incoming_balloon(uint32_t trans_id, uint32_t range_count, uint32_t more_pages) "incoming balloon response %"PRIu32", ranges %"PRIu32", more %"PRIu32 +hv_balloon_our_range_add(uint64_t count, uint64_t start) "adding our range %"PRIu64" @ 0x%"PRIx64 +hv_balloon_remove_response(uint64_t count, uint64_t start, unsigned int both) "processing remove response range %"PRIu64" @ 0x%"PRIx64", both %u" +hv_balloon_remove_response_hole(uint64_t counthole, uint64_t starthole, uint64_t countrange, uint64_t startrange, uint64_t starthpr, unsigned int both) "response range hole %"PRIu64" @ 0x%"PRIx64" from range %"PRIu64" @ 0x%"PRIx64", before our start 0x%"PRIx64", both %u" +hv_balloon_remove_response_common(uint64_t countcommon, uint64_t startcommon, uint64_t countrange, uint64_t startrange, uint64_t counthpr, uint64_t starthpr, uint64_t removed, unsigned int both) "response common range %"PRIu64" @ 0x%"PRIx64" from range %"PRIu64" @ 0x%"PRIx64" with our %"PRIu64" @ 0x%"PRIx64", removed %"PRIu64", both %u" +hv_balloon_remove_response_remainder(uint64_t count, uint64_t start, unsigned int both) "remove response remaining range %"PRIu64" @ 0x%"PRIx64", both %u" +hv_balloon_map_slot(unsigned int idx, unsigned int total_slots, uint64_t offset) "mapping memslot %u / %u @ 0x%"PRIx64 +hv_balloon_unmap_slot(unsigned int idx, unsigned int total_slots, uint64_t offset) "unmapping memslot %u / %u @ 0x%"PRIx64 diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index 94772c726b..55850791df 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -45,6 +45,7 @@ config PC select ACPI_VMGENID select VIRTIO_PMEM_SUPPORTED select VIRTIO_MEM_SUPPORTED + select HV_BALLOON_SUPPORTED config PC_PCI bool diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c index a731738411..b2b4be9983 100644 --- a/hw/i386/kvm/xen_evtchn.c +++ b/hw/i386/kvm/xen_evtchn.c @@ -490,6 +490,12 @@ int xen_evtchn_set_callback_param(uint64_t param) break; } + /* If the guest has set a per-vCPU callback vector, prefer that. */ + if (gsi && kvm_xen_has_vcpu_callback_vector()) { + in_kernel = kvm_xen_has_cap(EVTCHN_SEND); + gsi = 0; + } + if (!ret) { /* If vector delivery was turned *off* then tell the kernel */ if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) == @@ -1129,6 +1135,7 @@ int xen_evtchn_reset_op(struct evtchn_reset *reset) return -ESRCH; } + QEMU_IOTHREAD_LOCK_GUARD(); return xen_evtchn_soft_reset(); } diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c index 21c30e3659..839ec920a1 100644 --- a/hw/i386/kvm/xen_gnttab.c +++ b/hw/i386/kvm/xen_gnttab.c @@ -541,7 +541,5 @@ int xen_gnttab_reset(void) s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); - memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1); - return 0; } diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c index 660d0b72f9..8e716a7009 100644 --- a/hw/i386/kvm/xen_xenstore.c +++ b/hw/i386/kvm/xen_xenstore.c @@ -1357,10 +1357,12 @@ static void fire_watch_cb(void *opaque, const char *path, const char *token) } else { deliver_watch(s, path, token); /* - * If the message was queued because there was already ring activity, - * no need to wake the guest. But if not, we need to send the evtchn. + * Attempt to queue the message into the actual ring, and send + * the event channel notification if any bytes are copied. */ - xen_be_evtchn_notify(s->eh, s->be_port); + if (s->rsp_pending && put_rsp(s) > 0) { + xen_be_evtchn_notify(s->eh, s->be_port); + } } } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 6031234a73..1aef21aa2c 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -27,6 +27,7 @@ #include "hw/i386/pc.h" #include "hw/char/serial.h" #include "hw/char/parallel.h" +#include "hw/hyperv/hv-balloon.h" #include "hw/i386/fw_cfg.h" #include "hw/i386/vmport.h" #include "sysemu/cpus.h" @@ -57,6 +58,7 @@ #include "hw/i386/kvm/xen_evtchn.h" #include "hw/i386/kvm/xen_gnttab.h" #include "hw/i386/kvm/xen_xenstore.h" +#include "hw/mem/memory-device.h" #include "e820_memory_layout.h" #include "trace.h" #include CONFIG_DEVICES @@ -1422,6 +1424,21 @@ static void pc_memory_unplug(HotplugHandler *hotplug_dev, error_propagate(errp, local_err); } +static void pc_hv_balloon_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + /* The vmbus handler has no hotplug handler; we should never end up here. */ + g_assert(!dev->hotplugged); + memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL, + errp); +} + +static void pc_hv_balloon_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); +} + static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -1452,6 +1469,8 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, return; } pcms->iommu = dev; + } else if (object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON)) { + pc_hv_balloon_pre_plug(hotplug_dev, dev, errp); } } @@ -1464,6 +1483,8 @@ static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev, x86_cpu_plug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON)) { + pc_hv_balloon_plug(hotplug_dev, dev, errp); } } @@ -1505,6 +1526,7 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine, object_dynamic_cast(OBJECT(dev), TYPE_CPU) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || + object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON) || object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) { return HOTPLUG_HANDLER(machine); } diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c index ae38f48f16..e0704b8dc3 100644 --- a/hw/mem/memory-device.c +++ b/hw/mem/memory-device.c @@ -20,6 +20,22 @@ #include "exec/address-spaces.h" #include "trace.h" +static bool memory_device_is_empty(const MemoryDeviceState *md) +{ + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + Error *local_err = NULL; + MemoryRegion *mr; + + /* dropping const here is fine as we don't touch the memory region */ + mr = mdc->get_memory_region((MemoryDeviceState *)md, &local_err); + if (local_err) { + /* Not empty, we'll report errors later when ontaining the MR again. */ + error_free(local_err); + return false; + } + return !mr; +} + static gint memory_device_addr_sort(gconstpointer a, gconstpointer b) { const MemoryDeviceState *md_a = MEMORY_DEVICE(a); @@ -220,12 +236,6 @@ static uint64_t memory_device_get_free_addr(MachineState *ms, return 0; } - if (!QEMU_IS_ALIGNED(size, align)) { - error_setg(errp, "backend memory size must be multiple of 0x%" - PRIx64, align); - return 0; - } - if (hint) { if (range_init(&new, *hint, size) || !range_contains_range(&as, &new)) { error_setg(errp, "can't add memory device [0x%" PRIx64 ":0x%" PRIx64 @@ -249,6 +259,10 @@ static uint64_t memory_device_get_free_addr(MachineState *ms, uint64_t next_addr; Range tmp; + if (memory_device_is_empty(md)) { + continue; + } + range_init_nofail(&tmp, mdc->get_addr(md), memory_device_get_region_size(md, &error_abort)); @@ -292,6 +306,7 @@ MemoryDeviceInfoList *qmp_memory_device_list(void) const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(item->data); MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1); + /* Let's query infotmation even for empty memory devices. */ mdc->fill_device_info(md, info); QAPI_LIST_APPEND(tail, info); @@ -311,7 +326,7 @@ static int memory_device_plugged_size(Object *obj, void *opaque) const MemoryDeviceState *md = MEMORY_DEVICE(obj); const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(obj); - if (dev->realized) { + if (dev->realized && !memory_device_is_empty(md)) { *size += mdc->get_plugged_size(md, &error_abort); } } @@ -337,6 +352,11 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, uint64_t addr, align = 0; MemoryRegion *mr; + /* We support empty memory devices even without device memory. */ + if (memory_device_is_empty(md)) { + return; + } + if (!ms->device_memory) { error_setg(errp, "the configuration is not prepared for memory devices" " (e.g., for memory hotplug), consider specifying the" @@ -380,10 +400,17 @@ out: void memory_device_plug(MemoryDeviceState *md, MachineState *ms) { const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); - const unsigned int memslots = memory_device_get_memslots(md); - const uint64_t addr = mdc->get_addr(md); + unsigned int memslots; + uint64_t addr; MemoryRegion *mr; + if (memory_device_is_empty(md)) { + return; + } + + memslots = memory_device_get_memslots(md); + addr = mdc->get_addr(md); + /* * We expect that a previous call to memory_device_pre_plug() succeeded, so * it can't fail at this point. @@ -408,6 +435,10 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms) const unsigned int memslots = memory_device_get_memslots(md); MemoryRegion *mr; + if (memory_device_is_empty(md)) { + return; + } + /* * We expect that a previous call to memory_device_pre_plug() succeeded, so * it can't fail at this point. diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c index cc24812d2e..c3512c2dae 100644 --- a/hw/virtio/virtio-pmem.c +++ b/hw/virtio/virtio-pmem.c @@ -147,7 +147,10 @@ static void virtio_pmem_fill_device_info(const VirtIOPMEM *pmem, static MemoryRegion *virtio_pmem_get_memory_region(VirtIOPMEM *pmem, Error **errp) { - assert(pmem->memdev); + if (!pmem->memdev) { + error_setg(errp, "'%s' property must be set", VIRTIO_PMEM_MEMDEV_PROP); + return NULL; + } return &pmem->memdev->mr; } diff --git a/include/hw/hyperv/dynmem-proto.h b/include/hw/hyperv/dynmem-proto.h new file mode 100644 index 0000000000..d0f9090ac4 --- /dev/null +++ b/include/hw/hyperv/dynmem-proto.h @@ -0,0 +1,423 @@ +#ifndef HW_HYPERV_DYNMEM_PROTO_H +#define HW_HYPERV_DYNMEM_PROTO_H + +/* + * Hyper-V Dynamic Memory Protocol definitions + * + * Copyright (C) 2020-2023 Oracle and/or its affiliates. + * + * Based on drivers/hv/hv_balloon.c from Linux kernel: + * Copyright (c) 2012, Microsoft Corporation. + * + * Author: K. Y. Srinivasan <kys@microsoft.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +/* + * Protocol versions. The low word is the minor version, the high word the major + * version. + * + * History: + * Initial version 1.0 + * Changed to 0.1 on 2009/03/25 + * Changes to 0.2 on 2009/05/14 + * Changes to 0.3 on 2009/12/03 + * Changed to 1.0 on 2011/04/05 + * Changed to 2.0 on 2019/12/10 + */ + +#define DYNMEM_MAKE_VERSION(Major, Minor) ((uint32_t)(((Major) << 16) | (Minor))) +#define DYNMEM_MAJOR_VERSION(Version) ((uint32_t)(Version) >> 16) +#define DYNMEM_MINOR_VERSION(Version) ((uint32_t)(Version) & 0xff) + +enum { + DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3), + DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0), + DYNMEM_PROTOCOL_VERSION_3 = DYNMEM_MAKE_VERSION(2, 0), + + DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1, + DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2, + DYNMEM_PROTOCOL_VERSION_WIN10 = DYNMEM_PROTOCOL_VERSION_3, + + DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10 +}; + + + +/* + * Message Types + */ + +enum dm_message_type { + /* + * Version 0.3 + */ + DM_ERROR = 0, + DM_VERSION_REQUEST = 1, + DM_VERSION_RESPONSE = 2, + DM_CAPABILITIES_REPORT = 3, + DM_CAPABILITIES_RESPONSE = 4, + DM_STATUS_REPORT = 5, + DM_BALLOON_REQUEST = 6, + DM_BALLOON_RESPONSE = 7, + DM_UNBALLOON_REQUEST = 8, + DM_UNBALLOON_RESPONSE = 9, + DM_MEM_HOT_ADD_REQUEST = 10, + DM_MEM_HOT_ADD_RESPONSE = 11, + DM_VERSION_03_MAX = 11, + /* + * Version 1.0. + */ + DM_INFO_MESSAGE = 12, + DM_VERSION_1_MAX = 12, + + /* + * Version 2.0 + */ + DM_MEM_HOT_REMOVE_REQUEST = 13, + DM_MEM_HOT_REMOVE_RESPONSE = 14 +}; + + +/* + * Structures defining the dynamic memory management + * protocol. + */ + +union dm_version { + struct { + uint16_t minor_version; + uint16_t major_version; + }; + uint32_t version; +} QEMU_PACKED; + + +union dm_caps { + struct { + uint64_t balloon:1; + uint64_t hot_add:1; + /* + * To support guests that may have alignment + * limitations on hot-add, the guest can specify + * its alignment requirements; a value of n + * represents an alignment of 2^n in mega bytes. + */ + uint64_t hot_add_alignment:4; + uint64_t hot_remove:1; + uint64_t reservedz:57; + } cap_bits; + uint64_t caps; +} QEMU_PACKED; + +union dm_mem_page_range { + struct { + /* + * The PFN number of the first page in the range. + * 40 bits is the architectural limit of a PFN + * number for AMD64. + */ + uint64_t start_page:40; + /* + * The number of pages in the range. + */ + uint64_t page_cnt:24; + } finfo; + uint64_t page_range; +} QEMU_PACKED; + + + +/* + * The header for all dynamic memory messages: + * + * type: Type of the message. + * size: Size of the message in bytes; including the header. + * trans_id: The guest is responsible for manufacturing this ID. + */ + +struct dm_header { + uint16_t type; + uint16_t size; + uint32_t trans_id; +} QEMU_PACKED; + +/* + * A generic message format for dynamic memory. + * Specific message formats are defined later in the file. + */ + +struct dm_message { + struct dm_header hdr; + uint8_t data[]; /* enclosed message */ +} QEMU_PACKED; + + +/* + * Specific message types supporting the dynamic memory protocol. + */ + +/* + * Version negotiation message. Sent from the guest to the host. + * The guest is free to try different versions until the host + * accepts the version. + * + * dm_version: The protocol version requested. + * is_last_attempt: If TRUE, this is the last version guest will request. + * reservedz: Reserved field, set to zero. + */ + +struct dm_version_request { + struct dm_header hdr; + union dm_version version; + uint32_t is_last_attempt:1; + uint32_t reservedz:31; +} QEMU_PACKED; + +/* + * Version response message; Host to Guest and indicates + * if the host has accepted the version sent by the guest. + * + * is_accepted: If TRUE, host has accepted the version and the guest + * should proceed to the next stage of the protocol. FALSE indicates that + * guest should re-try with a different version. + * + * reservedz: Reserved field, set to zero. + */ + +struct dm_version_response { + struct dm_header hdr; + uint64_t is_accepted:1; + uint64_t reservedz:63; +} QEMU_PACKED; + +/* + * Message reporting capabilities. This is sent from the guest to the + * host. + */ + +struct dm_capabilities { + struct dm_header hdr; + union dm_caps caps; + uint64_t min_page_cnt; + uint64_t max_page_number; +} QEMU_PACKED; + +/* + * Response to the capabilities message. This is sent from the host to the + * guest. This message notifies if the host has accepted the guest's + * capabilities. If the host has not accepted, the guest must shutdown + * the service. + * + * is_accepted: Indicates if the host has accepted guest's capabilities. + * reservedz: Must be 0. + */ + +struct dm_capabilities_resp_msg { + struct dm_header hdr; + uint64_t is_accepted:1; + uint64_t hot_remove:1; + uint64_t suppress_pressure_reports:1; + uint64_t reservedz:61; +} QEMU_PACKED; + +/* + * This message is used to report memory pressure from the guest. + * This message is not part of any transaction and there is no + * response to this message. + * + * num_avail: Available memory in pages. + * num_committed: Committed memory in pages. + * page_file_size: The accumulated size of all page files + * in the system in pages. + * zero_free: The nunber of zero and free pages. + * page_file_writes: The writes to the page file in pages. + * io_diff: An indicator of file cache efficiency or page file activity, + * calculated as File Cache Page Fault Count - Page Read Count. + * This value is in pages. + * + * Some of these metrics are Windows specific and fortunately + * the algorithm on the host side that computes the guest memory + * pressure only uses num_committed value. + */ + +struct dm_status { + struct dm_header hdr; + uint64_t num_avail; + uint64_t num_committed; + uint64_t page_file_size; + uint64_t zero_free; + uint32_t page_file_writes; + uint32_t io_diff; +} QEMU_PACKED; + + +/* + * Message to ask the guest to allocate memory - balloon up message. + * This message is sent from the host to the guest. The guest may not be + * able to allocate as much memory as requested. + * + * num_pages: number of pages to allocate. + */ + +struct dm_balloon { + struct dm_header hdr; + uint32_t num_pages; + uint32_t reservedz; +} QEMU_PACKED; + + +/* + * Balloon response message; this message is sent from the guest + * to the host in response to the balloon message. + * + * reservedz: Reserved; must be set to zero. + * more_pages: If FALSE, this is the last message of the transaction. + * if TRUE there will atleast one more message from the guest. + * + * range_count: The number of ranges in the range array. + * + * range_array: An array of page ranges returned to the host. + * + */ + +struct dm_balloon_response { + struct dm_header hdr; + uint32_t reservedz; + uint32_t more_pages:1; + uint32_t range_count:31; + union dm_mem_page_range range_array[]; +} QEMU_PACKED; + +/* + * Un-balloon message; this message is sent from the host + * to the guest to give guest more memory. + * + * more_pages: If FALSE, this is the last message of the transaction. + * if TRUE there will atleast one more message from the guest. + * + * reservedz: Reserved; must be set to zero. + * + * range_count: The number of ranges in the range array. + * + * range_array: An array of page ranges returned to the host. + * + */ + +struct dm_unballoon_request { + struct dm_header hdr; + uint32_t more_pages:1; + uint32_t reservedz:31; + uint32_t range_count; + union dm_mem_page_range range_array[]; +} QEMU_PACKED; + +/* + * Un-balloon response message; this message is sent from the guest + * to the host in response to an unballoon request. + * + */ + +struct dm_unballoon_response { + struct dm_header hdr; +} QEMU_PACKED; + + +/* + * Hot add request message. Message sent from the host to the guest. + * + * mem_range: Memory range to hot add. + * + */ + +struct dm_hot_add { + struct dm_header hdr; + union dm_mem_page_range range; +} QEMU_PACKED; + +/* + * Hot add response message. + * This message is sent by the guest to report the status of a hot add request. + * If page_count is less than the requested page count, then the host should + * assume all further hot add requests will fail, since this indicates that + * the guest has hit an upper physical memory barrier. + * + * Hot adds may also fail due to low resources; in this case, the guest must + * not complete this message until the hot add can succeed, and the host must + * not send a new hot add request until the response is sent. + * If VSC fails to hot add memory DYNMEM_NUMBER_OF_UNSUCCESSFUL_HOTADD_ATTEMPTS + * times it fails the request. + * + * + * page_count: number of pages that were successfully hot added. + * + * result: result of the operation 1: success, 0: failure. + * + */ + +struct dm_hot_add_response { + struct dm_header hdr; + uint32_t page_count; + uint32_t result; +} QEMU_PACKED; + +struct dm_hot_remove { + struct dm_header hdr; + uint32_t virtual_node; + uint32_t page_count; + uint32_t qos_flags; + uint32_t reservedZ; +} QEMU_PACKED; + +struct dm_hot_remove_response { + struct dm_header hdr; + uint32_t result; + uint32_t range_count; + uint64_t more_pages:1; + uint64_t reservedz:63; + union dm_mem_page_range range_array[]; +} QEMU_PACKED; + +#define DM_REMOVE_QOS_LARGE (1 << 0) +#define DM_REMOVE_QOS_LOCAL (1 << 1) +#define DM_REMOVE_QOS_MASK (0x3) + +/* + * Types of information sent from host to the guest. + */ + +enum dm_info_type { + INFO_TYPE_MAX_PAGE_CNT = 0, + MAX_INFO_TYPE +}; + + +/* + * Header for the information message. + */ + +struct dm_info_header { + enum dm_info_type type; + uint32_t data_size; + uint8_t data[]; +} QEMU_PACKED; + +/* + * This message is sent from the host to the guest to pass + * some relevant information (win8 addition). + * + * reserved: no used. + * info_size: size of the information blob. + * info: information blob. + */ + +struct dm_info_msg { + struct dm_header hdr; + uint32_t reserved; + uint32_t info_size; + uint8_t info[]; +}; + +#endif diff --git a/include/hw/hyperv/hv-balloon.h b/include/hw/hyperv/hv-balloon.h new file mode 100644 index 0000000000..c1efe70fc2 --- /dev/null +++ b/include/hw/hyperv/hv-balloon.h @@ -0,0 +1,18 @@ +/* + * QEMU Hyper-V Dynamic Memory Protocol driver + * + * Copyright (C) 2020-2023 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HV_BALLOON_H +#define HW_HV_BALLOON_H + +#include "qom/object.h" + +#define TYPE_HV_BALLOON "hv-balloon" +OBJECT_DECLARE_SIMPLE_TYPE(HvBalloon, HV_BALLOON) + +#endif diff --git a/include/hw/mem/memory-device.h b/include/hw/mem/memory-device.h index 3354d6c166..a1d62cc551 100644 --- a/include/hw/mem/memory-device.h +++ b/include/hw/mem/memory-device.h @@ -38,6 +38,10 @@ typedef struct MemoryDeviceState MemoryDeviceState; * address in guest physical memory can either be specified explicitly * or get assigned automatically. * + * Some memory device might not own a memory region in certain device + * configurations. Such devices can logically get (un)plugged, however, + * empty memory devices are mostly ignored by the memory device code. + * * Conceptually, memory devices only span one memory region. If multiple * successive memory regions are used, a covering memory region has to * be provided. Scattered memory regions are not supported for single @@ -91,7 +95,8 @@ struct MemoryDeviceClass { uint64_t (*get_plugged_size)(const MemoryDeviceState *md, Error **errp); /* - * Return the memory region of the memory device. + * Return the memory region of the memory device. If the device is + * completely empty, returns NULL without an error. * * Called when (un)plugging the memory device, to (un)map the * memory region in guest physical memory, but also to detect the diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h index 7008d43d04..d702854853 100644 --- a/include/sysemu/dump.h +++ b/include/sysemu/dump.h @@ -137,7 +137,7 @@ typedef struct QEMU_PACKED KdumpSubHeader64 { } KdumpSubHeader64; typedef struct DataCache { - int fd; /* fd of the file where to write the cached data */ + DumpState *state; /* dump state related to this data */ uint8_t *buf; /* buffer for cached data */ size_t buf_size; /* size of the buf */ size_t data_size; /* size of cached data in buf */ @@ -157,6 +157,7 @@ typedef struct DumpState { MemoryMappingList list; bool resume; bool detached; + bool kdump_raw; hwaddr memory_offset; int fd; diff --git a/include/sysemu/kvm_xen.h b/include/sysemu/kvm_xen.h index 595abfbe40..961c702c4e 100644 --- a/include/sysemu/kvm_xen.h +++ b/include/sysemu/kvm_xen.h @@ -22,6 +22,7 @@ int kvm_xen_soft_reset(void); uint32_t kvm_xen_get_caps(void); void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id); +bool kvm_xen_has_vcpu_callback_vector(void); void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type); void kvm_xen_set_callback_asserted(void); int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port); diff --git a/linux-user/loongarch64/cpu_loop.c b/linux-user/loongarch64/cpu_loop.c index 894fdd111a..73d7b6796a 100644 --- a/linux-user/loongarch64/cpu_loop.c +++ b/linux-user/loongarch64/cpu_loop.c @@ -72,6 +72,19 @@ void cpu_loop(CPULoongArchState *env) case EXCCODE_BCE: force_sig_fault(TARGET_SIGSYS, TARGET_SI_KERNEL, env->pc); break; + + /* + * Begin with LSX and LASX disabled, then enable on the first trap. + * In this way we can tell if the unit is in use. This is used to + * choose the layout of any signal frame. + */ + case EXCCODE_SXD: + env->CSR_EUEN |= R_CSR_EUEN_SXE_MASK; + break; + case EXCCODE_ASXD: + env->CSR_EUEN |= R_CSR_EUEN_ASXE_MASK; + break; + case EXCP_ATOMIC: cpu_exec_step_atomic(cs); break; diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c index afcee641a6..39ea82c814 100644 --- a/linux-user/loongarch64/signal.c +++ b/linux-user/loongarch64/signal.c @@ -18,10 +18,10 @@ #define SC_USED_FP (1 << 0) struct target_sigcontext { - uint64_t sc_pc; - uint64_t sc_regs[32]; - uint32_t sc_flags; - uint64_t sc_extcontext[0] QEMU_ALIGNED(16); + abi_ulong sc_pc; + abi_ulong sc_regs[32]; + abi_uint sc_flags; + abi_ulong sc_extcontext[0] QEMU_ALIGNED(16); }; QEMU_BUILD_BUG_ON(sizeof(struct target_sigcontext) != sizeof_sigcontext); @@ -33,19 +33,35 @@ QEMU_BUILD_BUG_ON(offsetof(struct target_sigcontext, sc_regs) #define FPU_CTX_MAGIC 0x46505501 #define FPU_CTX_ALIGN 8 struct target_fpu_context { - uint64_t regs[32]; - uint64_t fcc; - uint32_t fcsr; + abi_ulong regs[32]; + abi_ulong fcc; + abi_uint fcsr; } QEMU_ALIGNED(FPU_CTX_ALIGN); QEMU_BUILD_BUG_ON(offsetof(struct target_fpu_context, regs) != offsetof_fpucontext_fr); +#define LSX_CTX_MAGIC 0x53580001 +#define LSX_CTX_ALIGN 16 +struct target_lsx_context { + abi_ulong regs[2 * 32]; + abi_ulong fcc; + abi_uint fcsr; +} QEMU_ALIGNED(LSX_CTX_ALIGN); + +#define LASX_CTX_MAGIC 0x41535801 +#define LASX_CTX_ALIGN 32 +struct target_lasx_context { + abi_ulong regs[4 * 32]; + abi_ulong fcc; + abi_uint fcsr; +} QEMU_ALIGNED(LASX_CTX_ALIGN); + #define CONTEXT_INFO_ALIGN 16 struct target_sctx_info { - uint32_t magic; - uint32_t size; - uint64_t padding; + abi_uint magic; + abi_uint size; + abi_ulong padding; } QEMU_ALIGNED(CONTEXT_INFO_ALIGN); QEMU_BUILD_BUG_ON(sizeof(struct target_sctx_info) != sizeof_sctx_info); @@ -81,9 +97,11 @@ struct ctx_layout { }; struct extctx_layout { - unsigned int size; + unsigned long size; unsigned int flags; struct ctx_layout fpu; + struct ctx_layout lsx; + struct ctx_layout lasx; struct ctx_layout end; }; @@ -105,7 +123,8 @@ static abi_ptr extframe_alloc(struct extctx_layout *extctx, return sp; } -static abi_ptr setup_extcontext(struct extctx_layout *extctx, abi_ptr sp) +static abi_ptr setup_extcontext(CPULoongArchState *env, + struct extctx_layout *extctx, abi_ptr sp) { memset(extctx, 0, sizeof(struct extctx_layout)); @@ -114,8 +133,17 @@ static abi_ptr setup_extcontext(struct extctx_layout *extctx, abi_ptr sp) /* For qemu, there is no lazy fp context switch, so fp always present. */ extctx->flags = SC_USED_FP; - sp = extframe_alloc(extctx, &extctx->fpu, - sizeof(struct target_rt_sigframe), FPU_CTX_ALIGN, sp); + + if (FIELD_EX64(env->CSR_EUEN, CSR_EUEN, ASXE)) { + sp = extframe_alloc(extctx, &extctx->lasx, + sizeof(struct target_lasx_context), LASX_CTX_ALIGN, sp); + } else if (FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE)) { + sp = extframe_alloc(extctx, &extctx->lsx, + sizeof(struct target_lsx_context), LSX_CTX_ALIGN, sp); + } else { + sp = extframe_alloc(extctx, &extctx->fpu, + sizeof(struct target_fpu_context), FPU_CTX_ALIGN, sp); + } return sp; } @@ -125,7 +153,6 @@ static void setup_sigframe(CPULoongArchState *env, struct extctx_layout *extctx) { struct target_sctx_info *info; - struct target_fpu_context *fpu_ctx; int i; __put_user(extctx->flags, &sc->sc_flags); @@ -136,25 +163,63 @@ static void setup_sigframe(CPULoongArchState *env, } /* - * Set fpu context + * Set extension context */ - info = extctx->fpu.haddr; - __put_user(FPU_CTX_MAGIC, &info->magic); - __put_user(extctx->fpu.size, &info->size); - fpu_ctx = (struct target_fpu_context *)(info + 1); - for (i = 0; i < 32; ++i) { - __put_user(env->fpr[i].vreg.D(0), &fpu_ctx->regs[i]); + if (FIELD_EX64(env->CSR_EUEN, CSR_EUEN, ASXE)) { + struct target_lasx_context *lasx_ctx; + info = extctx->lasx.haddr; + + __put_user(LASX_CTX_MAGIC, &info->magic); + __put_user(extctx->lasx.size, &info->size); + + lasx_ctx = (struct target_lasx_context *)(info + 1); + + for (i = 0; i < 32; ++i) { + __put_user(env->fpr[i].vreg.UD(0), &lasx_ctx->regs[4 * i]); + __put_user(env->fpr[i].vreg.UD(1), &lasx_ctx->regs[4 * i + 1]); + __put_user(env->fpr[i].vreg.UD(2), &lasx_ctx->regs[4 * i + 2]); + __put_user(env->fpr[i].vreg.UD(3), &lasx_ctx->regs[4 * i + 3]); + } + __put_user(read_fcc(env), &lasx_ctx->fcc); + __put_user(env->fcsr0, &lasx_ctx->fcsr); + } else if (FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE)) { + struct target_lsx_context *lsx_ctx; + info = extctx->lsx.haddr; + + __put_user(LSX_CTX_MAGIC, &info->magic); + __put_user(extctx->lsx.size, &info->size); + + lsx_ctx = (struct target_lsx_context *)(info + 1); + + for (i = 0; i < 32; ++i) { + __put_user(env->fpr[i].vreg.UD(0), &lsx_ctx->regs[2 * i]); + __put_user(env->fpr[i].vreg.UD(1), &lsx_ctx->regs[2 * i + 1]); + } + __put_user(read_fcc(env), &lsx_ctx->fcc); + __put_user(env->fcsr0, &lsx_ctx->fcsr); + } else { + struct target_fpu_context *fpu_ctx; + info = extctx->fpu.haddr; + + __put_user(FPU_CTX_MAGIC, &info->magic); + __put_user(extctx->fpu.size, &info->size); + + fpu_ctx = (struct target_fpu_context *)(info + 1); + + for (i = 0; i < 32; ++i) { + __put_user(env->fpr[i].vreg.UD(0), &fpu_ctx->regs[i]); + } + __put_user(read_fcc(env), &fpu_ctx->fcc); + __put_user(env->fcsr0, &fpu_ctx->fcsr); } - __put_user(read_fcc(env), &fpu_ctx->fcc); - __put_user(env->fcsr0, &fpu_ctx->fcsr); /* * Set end context */ info = extctx->end.haddr; __put_user(0, &info->magic); - __put_user(extctx->end.size, &info->size); + __put_user(0, &info->size); } static bool parse_extcontext(struct extctx_layout *extctx, abi_ptr frame) @@ -162,7 +227,7 @@ static bool parse_extcontext(struct extctx_layout *extctx, abi_ptr frame) memset(extctx, 0, sizeof(*extctx)); while (1) { - uint32_t magic, size; + abi_uint magic, size; if (get_user_u32(magic, frame) || get_user_u32(size, frame + 4)) { return false; @@ -184,6 +249,24 @@ static bool parse_extcontext(struct extctx_layout *extctx, abi_ptr frame) extctx->fpu.size = size; extctx->size += size; break; + case LSX_CTX_MAGIC: + if (size < (sizeof(struct target_sctx_info) + + sizeof(struct target_lsx_context))) { + return false; + } + extctx->lsx.gaddr = frame; + extctx->lsx.size = size; + extctx->size += size; + break; + case LASX_CTX_MAGIC: + if (size < (sizeof(struct target_sctx_info) + + sizeof(struct target_lasx_context))) { + return false; + } + extctx->lasx.gaddr = frame; + extctx->lasx.size = size; + extctx->size += size; + break; default: return false; } @@ -197,19 +280,45 @@ static void restore_sigframe(CPULoongArchState *env, struct extctx_layout *extctx) { int i; + abi_ulong fcc; __get_user(env->pc, &sc->sc_pc); for (i = 1; i < 32; ++i) { __get_user(env->gpr[i], &sc->sc_regs[i]); } - if (extctx->fpu.haddr) { + if (extctx->lasx.haddr) { + struct target_lasx_context *lasx_ctx = + extctx->lasx.haddr + sizeof(struct target_sctx_info); + + for (i = 0; i < 32; ++i) { + __get_user(env->fpr[i].vreg.UD(0), &lasx_ctx->regs[4 * i]); + __get_user(env->fpr[i].vreg.UD(1), &lasx_ctx->regs[4 * i + 1]); + __get_user(env->fpr[i].vreg.UD(2), &lasx_ctx->regs[4 * i + 2]); + __get_user(env->fpr[i].vreg.UD(3), &lasx_ctx->regs[4 * i + 3]); + } + __get_user(fcc, &lasx_ctx->fcc); + write_fcc(env, fcc); + __get_user(env->fcsr0, &lasx_ctx->fcsr); + restore_fp_status(env); + } else if (extctx->lsx.haddr) { + struct target_lsx_context *lsx_ctx = + extctx->lsx.haddr + sizeof(struct target_sctx_info); + + for (i = 0; i < 32; ++i) { + __get_user(env->fpr[i].vreg.UD(0), &lsx_ctx->regs[2 * i]); + __get_user(env->fpr[i].vreg.UD(1), &lsx_ctx->regs[2 * i + 1]); + } + __get_user(fcc, &lsx_ctx->fcc); + write_fcc(env, fcc); + __get_user(env->fcsr0, &lsx_ctx->fcsr); + restore_fp_status(env); + } else if (extctx->fpu.haddr) { struct target_fpu_context *fpu_ctx = extctx->fpu.haddr + sizeof(struct target_sctx_info); - uint64_t fcc; for (i = 0; i < 32; ++i) { - __get_user(env->fpr[i].vreg.D(0), &fpu_ctx->regs[i]); + __get_user(env->fpr[i].vreg.UD(0), &fpu_ctx->regs[i]); } __get_user(fcc, &fpu_ctx->fcc); write_fcc(env, fcc); @@ -229,7 +338,7 @@ static abi_ptr get_sigframe(struct target_sigaction *ka, sp = target_sigsp(get_sp_from_cpustate(env), ka); sp = ROUND_DOWN(sp, 16); - sp = setup_extcontext(extctx, sp); + sp = setup_extcontext(env, extctx, sp); sp -= sizeof(struct target_rt_sigframe); assert(QEMU_IS_ALIGNED(sp, 16)); @@ -255,8 +364,17 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, force_sigsegv(sig); return; } - extctx.fpu.haddr = (void *)frame + (extctx.fpu.gaddr - frame_addr); - extctx.end.haddr = (void *)frame + (extctx.end.gaddr - frame_addr); + + if (FIELD_EX64(env->CSR_EUEN, CSR_EUEN, ASXE)) { + extctx.lasx.haddr = (void *)frame + (extctx.lasx.gaddr - frame_addr); + extctx.end.haddr = (void *)frame + (extctx.end.gaddr - frame_addr); + } else if (FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE)) { + extctx.lsx.haddr = (void *)frame + (extctx.lsx.gaddr - frame_addr); + extctx.end.haddr = (void *)frame + (extctx.end.gaddr - frame_addr); + } else { + extctx.fpu.haddr = (void *)frame + (extctx.fpu.gaddr - frame_addr); + extctx.end.haddr = (void *)frame + (extctx.end.gaddr - frame_addr); + } tswap_siginfo(&frame->rs_info, info); @@ -299,7 +417,12 @@ long do_rt_sigreturn(CPULoongArchState *env) if (!frame) { goto badframe; } - if (extctx.fpu.gaddr) { + + if (extctx.lasx.gaddr) { + extctx.lasx.haddr = (void *)frame + (extctx.lasx.gaddr - frame_addr); + } else if (extctx.lsx.gaddr) { + extctx.lsx.haddr = (void *)frame + (extctx.lsx.gaddr - frame_addr); + } else if (extctx.fpu.gaddr) { extctx.fpu.haddr = (void *)frame + (extctx.fpu.gaddr - frame_addr); } diff --git a/linux-user/sparc/cpu_loop.c b/linux-user/sparc/cpu_loop.c index b36bb2574b..3c1bde00dd 100644 --- a/linux-user/sparc/cpu_loop.c +++ b/linux-user/sparc/cpu_loop.c @@ -197,10 +197,8 @@ static uint32_t do_getpsr(CPUSPARCState *env) /* Avoid ifdefs below for the abi32 and abi64 paths. */ #ifdef TARGET_ABI32 #define TARGET_TT_SYSCALL (TT_TRAP + 0x10) /* t_linux */ -#define syscall_cc psr #else #define TARGET_TT_SYSCALL (TT_TRAP + 0x6d) /* tl0_linux64 */ -#define syscall_cc xcc #endif /* Avoid ifdefs below for the v9 and pre-v9 hw traps. */ @@ -224,11 +222,6 @@ void cpu_loop (CPUSPARCState *env) cpu_exec_end(cs); process_queued_cpu_work(cs); - /* Compute PSR before exposing state. */ - if (env->cc_op != CC_OP_FLAGS) { - cpu_get_psr(env); - } - switch (trapnr) { case TARGET_TT_SYSCALL: ret = do_syscall (env, env->gregs[1], @@ -240,10 +233,10 @@ void cpu_loop (CPUSPARCState *env) break; } if ((abi_ulong)ret >= (abi_ulong)(-515)) { - env->syscall_cc |= PSR_CARRY; + set_syscall_C(env, 1); ret = -ret; } else { - env->syscall_cc &= ~PSR_CARRY; + set_syscall_C(env, 0); } env->regwptr[0] = ret; /* next instruction */ diff --git a/linux-user/sparc/signal.c b/linux-user/sparc/signal.c index 2be9000b9e..dfcae707e0 100644 --- a/linux-user/sparc/signal.c +++ b/linux-user/sparc/signal.c @@ -164,7 +164,7 @@ static void restore_pt_regs(struct target_pt_regs *regs, CPUSPARCState *env) */ uint32_t psr; __get_user(psr, ®s->psr); - env->psr = (psr & PSR_ICC) | (env->psr & ~PSR_ICC); + cpu_put_psr_icc(env, psr); #endif /* Note that pc and npc are handled in the caller. */ diff --git a/linux-user/sparc/target_cpu.h b/linux-user/sparc/target_cpu.h index 1f4bed50f4..5f62c5eb75 100644 --- a/linux-user/sparc/target_cpu.h +++ b/linux-user/sparc/target_cpu.h @@ -26,6 +26,17 @@ # define TARGET_STACK_BIAS 0 #endif +static void set_syscall_C(CPUSPARCState *env, bool val) +{ +#ifndef TARGET_SPARC64 + env->icc_C = val; +#elif defined(TARGET_ABI32) + env->icc_C = (uint64_t)val << 32; +#else + env->xcc_C = val; +#endif +} + static inline void cpu_clone_regs_child(CPUSPARCState *env, target_ulong newsp, unsigned flags) { @@ -58,11 +69,7 @@ static inline void cpu_clone_regs_child(CPUSPARCState *env, target_ulong newsp, * do the pc advance twice. */ env->regwptr[WREG_O0] = 0; -#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32) - env->xcc &= ~PSR_CARRY; -#else - env->psr &= ~PSR_CARRY; -#endif + set_syscall_C(env, 0); env->pc = env->npc; env->npc = env->npc + 4; } diff --git a/meson.build b/meson.build index dcef8b1e79..51a51075db 100644 --- a/meson.build +++ b/meson.build @@ -1323,6 +1323,30 @@ if not get_option('glusterfs').auto() or have_block endif endif +hv_balloon = false +if get_option('hv_balloon').allowed() and have_system + if cc.links(''' + #include <string.h> + #include <gmodule.h> + int main(void) { + GTree *tree; + + tree = g_tree_new((GCompareFunc)strcmp); + (void)g_tree_node_first(tree); + g_tree_destroy(tree); + return 0; + } + ''', dependencies: glib) + hv_balloon = true + else + if get_option('hv_balloon').enabled() + error('could not enable hv-balloon, update your glib') + else + warning('could not find glib support for hv-balloon, disabling') + endif + endif +endif + libssh = not_found if not get_option('libssh').auto() or have_block libssh = dependency('libssh', version: '>=0.8.7', @@ -2855,7 +2879,8 @@ host_kconfig = \ (targetos == 'linux' ? ['CONFIG_LINUX=y'] : []) + \ (have_pvrdma ? ['CONFIG_PVRDMA=y'] : []) + \ (multiprocess_allowed ? ['CONFIG_MULTIPROCESS_ALLOWED=y'] : []) + \ - (vfio_user_server_allowed ? ['CONFIG_VFIO_USER_SERVER_ALLOWED=y'] : []) + (vfio_user_server_allowed ? ['CONFIG_VFIO_USER_SERVER_ALLOWED=y'] : []) + \ + (hv_balloon ? ['CONFIG_HV_BALLOON_POSSIBLE=y'] : []) ignored = [ 'TARGET_XML_FILES', 'TARGET_ABI_DIR', 'TARGET_ARCH' ] @@ -4321,6 +4346,7 @@ if targetos == 'windows' endif summary_info += {'seccomp support': seccomp} summary_info += {'GlusterFS support': glusterfs} +summary_info += {'hv-balloon support': hv_balloon} summary_info += {'TPM support': have_tpm} summary_info += {'libssh support': libssh} summary_info += {'lzo support': lzo} diff --git a/meson_options.txt b/meson_options.txt index 3c7398f3c6..5c212fcd45 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -150,6 +150,8 @@ option('gio', type : 'feature', value : 'auto', description: 'use libgio for D-Bus support') option('glusterfs', type : 'feature', value : 'auto', description: 'Glusterfs block device driver') +option('hv_balloon', type : 'feature', value : 'auto', + description: 'hv-balloon driver (requires Glib 2.68+ GTree API)') option('libdw', type : 'feature', value : 'auto', description: 'debuginfo support') option('libiscsi', type : 'feature', value : 'auto', diff --git a/migration/ram.c b/migration/ram.c index a0f3b86663..8c7886ab79 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3030,71 +3030,71 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) * MAX_WAIT (if curious, further see commit 4508bd9ed8053ce) below, which * guarantees that we'll at least released it in a regular basis. */ - qemu_mutex_lock(&rs->bitmap_mutex); - WITH_RCU_READ_LOCK_GUARD() { - if (ram_list.version != rs->last_version) { - ram_state_reset(rs); - } + WITH_QEMU_LOCK_GUARD(&rs->bitmap_mutex) { + WITH_RCU_READ_LOCK_GUARD() { + if (ram_list.version != rs->last_version) { + ram_state_reset(rs); + } - /* Read version before ram_list.blocks */ - smp_rmb(); + /* Read version before ram_list.blocks */ + smp_rmb(); - ret = rdma_registration_start(f, RAM_CONTROL_ROUND); - if (ret < 0) { - qemu_file_set_error(f, ret); - goto out; - } + ret = rdma_registration_start(f, RAM_CONTROL_ROUND); + if (ret < 0) { + qemu_file_set_error(f, ret); + goto out; + } - t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - i = 0; - while ((ret = migration_rate_exceeded(f)) == 0 || - postcopy_has_request(rs)) { - int pages; + t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + i = 0; + while ((ret = migration_rate_exceeded(f)) == 0 || + postcopy_has_request(rs)) { + int pages; - if (qemu_file_get_error(f)) { - break; - } + if (qemu_file_get_error(f)) { + break; + } - pages = ram_find_and_save_block(rs); - /* no more pages to sent */ - if (pages == 0) { - done = 1; - break; - } + pages = ram_find_and_save_block(rs); + /* no more pages to sent */ + if (pages == 0) { + done = 1; + break; + } - if (pages < 0) { - qemu_file_set_error(f, pages); - break; - } + if (pages < 0) { + qemu_file_set_error(f, pages); + break; + } - rs->target_page_count += pages; + rs->target_page_count += pages; - /* - * During postcopy, it is necessary to make sure one whole host - * page is sent in one chunk. - */ - if (migrate_postcopy_ram()) { - compress_flush_data(); - } + /* + * During postcopy, it is necessary to make sure one whole host + * page is sent in one chunk. + */ + if (migrate_postcopy_ram()) { + compress_flush_data(); + } - /* - * we want to check in the 1st loop, just in case it was the 1st - * time and we had to sync the dirty bitmap. - * qemu_clock_get_ns() is a bit expensive, so we only check each - * some iterations - */ - if ((i & 63) == 0) { - uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / - 1000000; - if (t1 > MAX_WAIT) { - trace_ram_save_iterate_big_wait(t1, i); - break; + /* + * we want to check in the 1st loop, just in case it was the 1st + * time and we had to sync the dirty bitmap. + * qemu_clock_get_ns() is a bit expensive, so we only check each + * some iterations + */ + if ((i & 63) == 0) { + uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / + 1000000; + if (t1 > MAX_WAIT) { + trace_ram_save_iterate_big_wait(t1, i); + break; + } } + i++; } - i++; } } - qemu_mutex_unlock(&rs->bitmap_mutex); /* * Must occur before EOS (or any QEMUFile operation) diff --git a/monitor/monitor.c b/monitor/monitor.c index 941f87815a..01ede1babd 100644 --- a/monitor/monitor.c +++ b/monitor/monitor.c @@ -315,6 +315,7 @@ static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = { [QAPI_EVENT_QUORUM_FAILURE] = { 1000 * SCALE_MS }, [QAPI_EVENT_VSERPORT_CHANGE] = { 1000 * SCALE_MS }, [QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE] = { 1000 * SCALE_MS }, + [QAPI_EVENT_HV_BALLOON_STATUS_REPORT] = { 1000 * SCALE_MS }, }; /* diff --git a/qapi/dump.json b/qapi/dump.json index 4ae1f722a9..5cbc237ad9 100644 --- a/qapi/dump.json +++ b/qapi/dump.json @@ -15,11 +15,23 @@ # # @elf: elf format # -# @kdump-zlib: kdump-compressed format with zlib-compressed +# @kdump-zlib: makedumpfile flattened, kdump-compressed format with zlib +# compression # -# @kdump-lzo: kdump-compressed format with lzo-compressed +# @kdump-lzo: makedumpfile flattened, kdump-compressed format with lzo +# compression # -# @kdump-snappy: kdump-compressed format with snappy-compressed +# @kdump-snappy: makedumpfile flattened, kdump-compressed format with snappy +# compression +# +# @kdump-raw-zlib: raw assembled kdump-compressed format with zlib compression +# (since 8.2) +# +# @kdump-raw-lzo: raw assembled kdump-compressed format with lzo compression +# (since 8.2) +# +# @kdump-raw-snappy: raw assembled kdump-compressed format with snappy +# compression (since 8.2) # # @win-dmp: Windows full crashdump format, can be used instead of ELF # converting (since 2.13) @@ -27,7 +39,11 @@ # Since: 2.0 ## { 'enum': 'DumpGuestMemoryFormat', - 'data': [ 'elf', 'kdump-zlib', 'kdump-lzo', 'kdump-snappy', 'win-dmp' ] } + 'data': [ + 'elf', + 'kdump-zlib', 'kdump-lzo', 'kdump-snappy', + 'kdump-raw-zlib', 'kdump-raw-lzo', 'kdump-raw-snappy', + 'win-dmp' ] } ## # @dump-guest-memory: diff --git a/qapi/machine-target.json b/qapi/machine-target.json index 4e55adbe00..c8d7d9868d 100644 --- a/qapi/machine-target.json +++ b/qapi/machine-target.json @@ -230,7 +230,8 @@ 'data': { 'model': 'CpuModelInfo' }, 'if': { 'any': [ 'TARGET_S390X', 'TARGET_I386', - 'TARGET_ARM' ] } } + 'TARGET_ARM', + 'TARGET_LOONGARCH64' ] } } ## # @query-cpu-model-expansion: @@ -275,7 +276,8 @@ 'returns': 'CpuModelExpansionInfo', 'if': { 'any': [ 'TARGET_S390X', 'TARGET_I386', - 'TARGET_ARM' ] } } + 'TARGET_ARM', + 'TARGET_LOONGARCH64' ] } } ## # @CpuDefinitionInfo: diff --git a/qapi/machine.json b/qapi/machine.json index 6c9d2f6dcf..b6d634b30d 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -1138,6 +1138,68 @@ 'data': { 'actual': 'int' } } ## +# @HvBalloonInfo: +# +# hv-balloon guest-provided memory status information. +# +# @committed: the amount of memory in use inside the guest plus the +# amount of the memory unusable inside the guest (ballooned out, +# offline, etc.) +# +# @available: the amount of the memory inside the guest available for +# new allocations ("free") +# +# Since: 8.2 +## +{ 'struct': 'HvBalloonInfo', + 'data': { 'committed': 'size', 'available': 'size' } } + +## +# @query-hv-balloon-status-report: +# +# Returns the hv-balloon driver data contained in the last received "STATUS" +# message from the guest. +# +# Returns: +# - @HvBalloonInfo on success +# - If no hv-balloon device is present, guest memory status reporting +# is not enabled or no guest memory status report received yet, +# GenericError +# +# Since: 8.2 +# +# Example: +# +# -> { "execute": "query-hv-balloon-status-report" } +# <- { "return": { +# "committed": 816640000, +# "available": 3333054464 +# } +# } +## +{ 'command': 'query-hv-balloon-status-report', 'returns': 'HvBalloonInfo' } + +## +# @HV_BALLOON_STATUS_REPORT: +# +# Emitted when the hv-balloon driver receives a "STATUS" message from +# the guest. +# +# Note: this event is rate-limited. +# +# Since: 8.2 +# +# Example: +# +# <- { "event": "HV_BALLOON_STATUS_REPORT", +# "data": { "committed": 816640000, "available": 3333054464 }, +# "timestamp": { "seconds": 1600295492, "microseconds": 661044 } } +# +## +{ 'event': 'HV_BALLOON_STATUS_REPORT', + 'data': 'HvBalloonInfo' } + +## # @MemoryInfo: # # Actual memory information in bytes. @@ -1290,6 +1352,29 @@ } ## +# @HvBalloonDeviceInfo: +# +# hv-balloon provided memory state information +# +# @id: device's ID +# +# @memaddr: physical address in memory, where device is mapped +# +# @max-size: the maximum size of memory that the device can provide +# +# @memdev: memory backend linked with device +# +# Since: 8.2 +## +{ 'struct': 'HvBalloonDeviceInfo', + 'data': { '*id': 'str', + '*memaddr': 'size', + 'max-size': 'size', + '*memdev': 'str' + } +} + +## # @MemoryDeviceInfoKind: # # @nvdimm: since 2.12 @@ -1300,10 +1385,13 @@ # # @sgx-epc: since 6.2. # +# @hv-balloon: since 8.2. +# # Since: 2.1 ## { 'enum': 'MemoryDeviceInfoKind', - 'data': [ 'dimm', 'nvdimm', 'virtio-pmem', 'virtio-mem', 'sgx-epc' ] } + 'data': [ 'dimm', 'nvdimm', 'virtio-pmem', 'virtio-mem', 'sgx-epc', + 'hv-balloon' ] } ## # @PCDIMMDeviceInfoWrapper: @@ -1338,6 +1426,14 @@ 'data': { 'data': 'SgxEPCDeviceInfo' } } ## +# @HvBalloonDeviceInfoWrapper: +# +# Since: 8.2 +## +{ 'struct': 'HvBalloonDeviceInfoWrapper', + 'data': { 'data': 'HvBalloonDeviceInfo' } } + +## # @MemoryDeviceInfo: # # Union containing information about a memory device @@ -1351,7 +1447,8 @@ 'nvdimm': 'PCDIMMDeviceInfoWrapper', 'virtio-pmem': 'VirtioPMEMDeviceInfoWrapper', 'virtio-mem': 'VirtioMEMDeviceInfoWrapper', - 'sgx-epc': 'SgxEPCDeviceInfoWrapper' + 'sgx-epc': 'SgxEPCDeviceInfoWrapper', + 'hv-balloon': 'HvBalloonDeviceInfoWrapper' } } diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 7ca4b77eae..e9d6d39279 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -123,6 +123,7 @@ meson_options_help() { printf "%s\n" ' gtk-clipboard clipboard support for the gtk UI (EXPERIMENTAL, MAY HANG)' printf "%s\n" ' guest-agent Build QEMU Guest Agent' printf "%s\n" ' guest-agent-msi Build MSI package for the QEMU Guest Agent' + printf "%s\n" ' hv-balloon hv-balloon driver (requires Glib 2.68+ GTree API)' printf "%s\n" ' hvf HVF acceleration support' printf "%s\n" ' iconv Font glyph conversion support' printf "%s\n" ' jack JACK sound support' @@ -333,6 +334,8 @@ _meson_option_parse() { --disable-guest-agent-msi) printf "%s" -Dguest_agent_msi=disabled ;; --enable-hexagon-idef-parser) printf "%s" -Dhexagon_idef_parser=true ;; --disable-hexagon-idef-parser) printf "%s" -Dhexagon_idef_parser=false ;; + --enable-hv-balloon) printf "%s" -Dhv_balloon=enabled ;; + --disable-hv-balloon) printf "%s" -Dhv_balloon=disabled ;; --enable-hvf) printf "%s" -Dhvf=enabled ;; --disable-hvf) printf "%s" -Dhvf=disabled ;; --iasl=*) quote_sh "-Diasl=$2" ;; diff --git a/system/dirtylimit.c b/system/dirtylimit.c index fa959d7743..495c7a7082 100644 --- a/system/dirtylimit.c +++ b/system/dirtylimit.c @@ -411,12 +411,20 @@ void dirtylimit_set_all(uint64_t quota, void dirtylimit_vcpu_execute(CPUState *cpu) { - if (dirtylimit_in_service() && - dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled && - cpu->throttle_us_per_full) { - trace_dirtylimit_vcpu_execute(cpu->cpu_index, - cpu->throttle_us_per_full); - usleep(cpu->throttle_us_per_full); + if (cpu->throttle_us_per_full) { + dirtylimit_state_lock(); + + if (dirtylimit_in_service() && + dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { + dirtylimit_state_unlock(); + trace_dirtylimit_vcpu_execute(cpu->cpu_index, + cpu->throttle_us_per_full); + + g_usleep(cpu->throttle_us_per_full); + return; + } + + dirtylimit_state_unlock(); } } @@ -644,10 +652,6 @@ static struct DirtyLimitInfoList *dirtylimit_query_all(void) struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp) { - if (!dirtylimit_in_service()) { - return NULL; - } - return dirtylimit_query_all(); } diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c index 76348f9d5d..75b2c557b9 100644 --- a/target/i386/kvm/xen-emu.c +++ b/target/i386/kvm/xen-emu.c @@ -267,7 +267,6 @@ static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu, fi.submap |= 1 << XENFEAT_writable_page_tables | 1 << XENFEAT_writable_descriptor_tables | 1 << XENFEAT_auto_translated_physmap | - 1 << XENFEAT_supervisor_mode_kernel | 1 << XENFEAT_hvm_callback_vector | 1 << XENFEAT_hvm_safe_pvclock | 1 << XENFEAT_hvm_pirqs; @@ -307,7 +306,7 @@ static int kvm_xen_set_vcpu_callback_vector(CPUState *cs) trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector); - return kvm_vcpu_ioctl(cs, KVM_XEN_HVM_SET_ATTR, &xva); + return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xva); } static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data) @@ -425,6 +424,13 @@ void kvm_xen_set_callback_asserted(void) } } +bool kvm_xen_has_vcpu_callback_vector(void) +{ + CPUState *cs = qemu_get_cpu(0); + + return cs && !!X86_CPU(cs)->env.xen_vcpu_callback_vector; +} + void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type) { CPUState *cs = qemu_get_cpu(vcpu_id); @@ -441,7 +447,8 @@ void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type) * deliver it as an MSI. */ MSIMessage msg = { - .address = APIC_DEFAULT_ADDRESS | X86_CPU(cs)->apic_id, + .address = APIC_DEFAULT_ADDRESS | + (X86_CPU(cs)->apic_id << MSI_ADDR_DEST_ID_SHIFT), .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT), }; kvm_irqchip_send_msi(kvm_state, msg); @@ -850,8 +857,7 @@ static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu, int ret = -ENOSYS; switch (cmd) { case HVMOP_set_evtchn_upcall_vector: - ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, - exit->u.hcall.params[0]); + ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, arg); break; case HVMOP_pagetable_dying: diff --git a/target/loongarch/cpu-param.h b/target/loongarch/cpu-param.h index 1265dc7cb5..cfe195db4e 100644 --- a/target/loongarch/cpu-param.h +++ b/target/loongarch/cpu-param.h @@ -12,6 +12,6 @@ #define TARGET_PHYS_ADDR_SPACE_BITS 48 #define TARGET_VIRT_ADDR_SPACE_BITS 48 -#define TARGET_PAGE_BITS 14 +#define TARGET_PAGE_BITS 12 #endif diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c index ef1bf89dac..a60d07acd5 100644 --- a/target/loongarch/cpu.c +++ b/target/loongarch/cpu.c @@ -443,6 +443,7 @@ static void loongarch_la464_initfn(Object *obj) env->cpucfg[20] = data; env->CSR_ASID = FIELD_DP64(0, CSR_ASID, ASIDBITS, 0xa); + loongarch_cpu_post_init(obj); } static void loongarch_la132_initfn(Object *obj) @@ -474,6 +475,12 @@ static void loongarch_la132_initfn(Object *obj) env->cpucfg[1] = data; } +static void loongarch_max_initfn(Object *obj) +{ + /* '-cpu max' for TCG: we use cpu la464. */ + loongarch_la464_initfn(obj); +} + static void loongarch_cpu_list_entry(gpointer data, gpointer user_data) { const char *typename = object_class_get_name(OBJECT_CLASS(data)); @@ -616,6 +623,72 @@ static const MemoryRegionOps loongarch_qemu_ops = { }; #endif +static bool loongarch_get_lsx(Object *obj, Error **errp) +{ + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + bool ret; + + if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LSX)) { + ret = true; + } else { + ret = false; + } + return ret; +} + +static void loongarch_set_lsx(Object *obj, bool value, Error **errp) +{ + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + + if (value) { + cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, LSX, 1); + } else { + cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, LSX, 0); + cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, LASX, 0); + } +} + +static bool loongarch_get_lasx(Object *obj, Error **errp) +{ + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + bool ret; + + if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LASX)) { + ret = true; + } else { + ret = false; + } + return ret; +} + +static void loongarch_set_lasx(Object *obj, bool value, Error **errp) +{ + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + + if (value) { + if (!FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LSX)) { + cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, LSX, 1); + } + cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, LASX, 1); + } else { + cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, LASX, 0); + } +} + +void loongarch_cpu_post_init(Object *obj) +{ + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + + if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LSX)) { + object_property_add_bool(obj, "lsx", loongarch_get_lsx, + loongarch_set_lsx); + } + if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LASX)) { + object_property_add_bool(obj, "lasx", loongarch_get_lasx, + loongarch_set_lasx); + } +} + static void loongarch_cpu_init(Object *obj) { #ifndef CONFIG_USER_ONLY @@ -829,6 +902,7 @@ static const TypeInfo loongarch_cpu_type_infos[] = { }, DEFINE_LOONGARCH_CPU_TYPE(64, "la464", loongarch_la464_initfn), DEFINE_LOONGARCH_CPU_TYPE(32, "la132", loongarch_la132_initfn), + DEFINE_LOONGARCH_CPU_TYPE(64, "max", loongarch_max_initfn), }; DEFINE_TYPES(loongarch_cpu_type_infos) diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h index 8b54cf109c..9d0f79f814 100644 --- a/target/loongarch/cpu.h +++ b/target/loongarch/cpu.h @@ -486,4 +486,6 @@ void loongarch_cpu_list(void); #define LOONGARCH_CPU_TYPE_NAME(model) model LOONGARCH_CPU_TYPE_SUFFIX #define CPU_RESOLVING_TYPE TYPE_LOONGARCH_CPU +void loongarch_cpu_post_init(Object *obj); + #endif /* LOONGARCH_CPU_H */ diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc index 98f856bb29..92b1d22e28 100644 --- a/target/loongarch/insn_trans/trans_vec.c.inc +++ b/target/loongarch/insn_trans/trans_vec.c.inc @@ -4,8 +4,6 @@ * Copyright (c) 2022-2023 Loongson Technology Corporation Limited */ -#ifndef CONFIG_USER_ONLY - static bool check_vec(DisasContext *ctx, uint32_t oprsz) { if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) { @@ -21,15 +19,6 @@ static bool check_vec(DisasContext *ctx, uint32_t oprsz) return true; } -#else - -static bool check_vec(DisasContext *ctx, uint32_t oprsz) -{ - return true; -} - -#endif - static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, gen_helper_gvec_4_ptr *fn) { diff --git a/target/loongarch/loongarch-qmp-cmds.c b/target/loongarch/loongarch-qmp-cmds.c index 6c25957881..645672ff59 100644 --- a/target/loongarch/loongarch-qmp-cmds.c +++ b/target/loongarch/loongarch-qmp-cmds.c @@ -7,8 +7,13 @@ */ #include "qemu/osdep.h" +#include "qapi/error.h" #include "qapi/qapi-commands-machine-target.h" #include "cpu.h" +#include "qapi/qmp/qerror.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qobject-input-visitor.h" +#include "qom/qom-qobject.h" static void loongarch_cpu_add_definition(gpointer data, gpointer user_data) { @@ -35,3 +40,62 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) return cpu_list; } + +static const char *cpu_model_advertised_features[] = { + "lsx", "lasx", NULL +}; + +CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, + CpuModelInfo *model, + Error **errp) +{ + CpuModelExpansionInfo *expansion_info; + QDict *qdict_out; + ObjectClass *oc; + Object *obj; + const char *name; + int i; + + if (type != CPU_MODEL_EXPANSION_TYPE_STATIC) { + error_setg(errp, "The requested expansion type is not supported"); + return NULL; + } + + oc = cpu_class_by_name(TYPE_LOONGARCH_CPU, model->name); + if (!oc) { + error_setg(errp, "The CPU type '%s' is not a recognized LoongArch CPU type", + model->name); + return NULL; + } + + obj = object_new(object_class_get_name(oc)); + + expansion_info = g_new0(CpuModelExpansionInfo, 1); + expansion_info->model = g_malloc0(sizeof(*expansion_info->model)); + expansion_info->model->name = g_strdup(model->name); + + qdict_out = qdict_new(); + + i = 0; + while ((name = cpu_model_advertised_features[i++]) != NULL) { + ObjectProperty *prop = object_property_find(obj, name); + if (prop) { + QObject *value; + + assert(prop->get); + value = object_property_get_qobject(obj, name, &error_abort); + + qdict_put_obj(qdict_out, name, value); + } + } + + if (!qdict_size(qdict_out)) { + qobject_unref(qdict_out); + } else { + expansion_info->model->props = QOBJECT(qdict_out); + } + + object_unref(obj); + + return expansion_info; +} diff --git a/target/loongarch/tlb_helper.c b/target/loongarch/tlb_helper.c index c8b8b0497f..449043c68b 100644 --- a/target/loongarch/tlb_helper.c +++ b/target/loongarch/tlb_helper.c @@ -60,6 +60,9 @@ static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, tlb_rplv = 0; } + /* Remove sw bit between bit12 -- bit PS*/ + tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) -1)); + /* Check access rights */ if (!tlb_v) { return TLBRET_INVALID; @@ -82,10 +85,6 @@ static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, return TLBRET_DIRTY; } - /* - * tlb_entry contains ppn[47:12] while 16KiB ppn is [47:15] - * need adjust. - */ *physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) | (address & MAKE_64BIT_MASK(0, tlb_ps)); *prot = PAGE_READ; @@ -774,7 +773,7 @@ void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd, /* Move Global bit */ tmp0 = ((tmp0 & (1 << LOONGARCH_HGLOBAL_SHIFT)) >> LOONGARCH_HGLOBAL_SHIFT) << R_TLBENTRY_G_SHIFT | - (tmp0 & (~(1 << R_TLBENTRY_G_SHIFT))); + (tmp0 & (~(1 << LOONGARCH_HGLOBAL_SHIFT))); ps = ptbase + ptwidth - 1; if (odd) { tmp0 += MAKE_64BIT_MASK(ps, 1); diff --git a/target/sparc/cc_helper.c b/target/sparc/cc_helper.c deleted file mode 100644 index 7ad5b9b29e..0000000000 --- a/target/sparc/cc_helper.c +++ /dev/null @@ -1,471 +0,0 @@ -/* - * Helpers for lazy condition code handling - * - * Copyright (c) 2003-2005 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - -#include "qemu/osdep.h" -#include "cpu.h" -#include "exec/helper-proto.h" - -static uint32_t compute_all_flags(CPUSPARCState *env) -{ - return env->psr & PSR_ICC; -} - -static uint32_t compute_C_flags(CPUSPARCState *env) -{ - return env->psr & PSR_CARRY; -} - -static inline uint32_t get_NZ_icc(int32_t dst) -{ - uint32_t ret = 0; - - if (dst == 0) { - ret = PSR_ZERO; - } else if (dst < 0) { - ret = PSR_NEG; - } - return ret; -} - -#ifdef TARGET_SPARC64 -static uint32_t compute_all_flags_xcc(CPUSPARCState *env) -{ - return env->xcc & PSR_ICC; -} - -static uint32_t compute_C_flags_xcc(CPUSPARCState *env) -{ - return env->xcc & PSR_CARRY; -} - -static inline uint32_t get_NZ_xcc(target_long dst) -{ - uint32_t ret = 0; - - if (!dst) { - ret = PSR_ZERO; - } else if (dst < 0) { - ret = PSR_NEG; - } - return ret; -} -#endif - -static inline uint32_t get_V_div_icc(target_ulong src2) -{ - uint32_t ret = 0; - - if (src2 != 0) { - ret = PSR_OVF; - } - return ret; -} - -static uint32_t compute_all_div(CPUSPARCState *env) -{ - uint32_t ret; - - ret = get_NZ_icc(CC_DST); - ret |= get_V_div_icc(CC_SRC2); - return ret; -} - -static uint32_t compute_C_div(CPUSPARCState *env) -{ - return 0; -} - -static inline uint32_t get_C_add_icc(uint32_t dst, uint32_t src1) -{ - uint32_t ret = 0; - - if (dst < src1) { - ret = PSR_CARRY; - } - return ret; -} - -static inline uint32_t get_C_addx_icc(uint32_t dst, uint32_t src1, - uint32_t src2) -{ - uint32_t ret = 0; - - if (((src1 & src2) | (~dst & (src1 | src2))) & (1U << 31)) { - ret = PSR_CARRY; - } - return ret; -} - -static inline uint32_t get_V_add_icc(uint32_t dst, uint32_t src1, - uint32_t src2) -{ - uint32_t ret = 0; - - if (((src1 ^ src2 ^ -1) & (src1 ^ dst)) & (1U << 31)) { - ret = PSR_OVF; - } - return ret; -} - -#ifdef TARGET_SPARC64 -static inline uint32_t get_C_add_xcc(target_ulong dst, target_ulong src1) -{ - uint32_t ret = 0; - - if (dst < src1) { - ret = PSR_CARRY; - } - return ret; -} - -static inline uint32_t get_C_addx_xcc(target_ulong dst, target_ulong src1, - target_ulong src2) -{ - uint32_t ret = 0; - - if (((src1 & src2) | (~dst & (src1 | src2))) & (1ULL << 63)) { - ret = PSR_CARRY; - } - return ret; -} - -static inline uint32_t get_V_add_xcc(target_ulong dst, target_ulong src1, - target_ulong src2) -{ - uint32_t ret = 0; - - if (((src1 ^ src2 ^ -1) & (src1 ^ dst)) & (1ULL << 63)) { - ret = PSR_OVF; - } - return ret; -} - -static uint32_t compute_all_add_xcc(CPUSPARCState *env) -{ - uint32_t ret; - - ret = get_NZ_xcc(CC_DST); - ret |= get_C_add_xcc(CC_DST, CC_SRC); - ret |= get_V_add_xcc(CC_DST, CC_SRC, CC_SRC2); - return ret; -} - -static uint32_t compute_C_add_xcc(CPUSPARCState *env) -{ - return get_C_add_xcc(CC_DST, CC_SRC); -} -#endif - -static uint32_t compute_all_add(CPUSPARCState *env) -{ - uint32_t ret; - - ret = get_NZ_icc(CC_DST); - ret |= get_C_add_icc(CC_DST, CC_SRC); - ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2); - return ret; -} - -static uint32_t compute_C_add(CPUSPARCState *env) -{ - return get_C_add_icc(CC_DST, CC_SRC); -} - -#ifdef TARGET_SPARC64 -static uint32_t compute_all_addx_xcc(CPUSPARCState *env) -{ - uint32_t ret; - - ret = get_NZ_xcc(CC_DST); - ret |= get_C_addx_xcc(CC_DST, CC_SRC, CC_SRC2); - ret |= get_V_add_xcc(CC_DST, CC_SRC, CC_SRC2); - return ret; -} - -static uint32_t compute_C_addx_xcc(CPUSPARCState *env) -{ - return get_C_addx_xcc(CC_DST, CC_SRC, CC_SRC2); -} -#endif - -static uint32_t compute_all_addx(CPUSPARCState *env) -{ - uint32_t ret; - - ret = get_NZ_icc(CC_DST); - ret |= get_C_addx_icc(CC_DST, CC_SRC, CC_SRC2); - ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2); - return ret; -} - -static uint32_t compute_C_addx(CPUSPARCState *env) -{ - return get_C_addx_icc(CC_DST, CC_SRC, CC_SRC2); -} - -static inline uint32_t get_V_tag_icc(target_ulong src1, target_ulong src2) -{ - uint32_t ret = 0; - - if ((src1 | src2) & 0x3) { - ret = PSR_OVF; - } - return ret; -} - -static uint32_t compute_all_tadd(CPUSPARCState *env) -{ - uint32_t ret; - - ret = get_NZ_icc(CC_DST); - ret |= get_C_add_icc(CC_DST, CC_SRC); - ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2); - ret |= get_V_tag_icc(CC_SRC, CC_SRC2); - return ret; -} - -static uint32_t compute_all_taddtv(CPUSPARCState *env) -{ - uint32_t ret; - - ret = get_NZ_icc(CC_DST); - ret |= get_C_add_icc(CC_DST, CC_SRC); - return ret; -} - -static inline uint32_t get_C_sub_icc(uint32_t src1, uint32_t src2) -{ - uint32_t ret = 0; - - if (src1 < src2) { - ret = PSR_CARRY; - } - return ret; -} - -static inline uint32_t get_C_subx_icc(uint32_t dst, uint32_t src1, - uint32_t src2) -{ - uint32_t ret = 0; - - if (((~src1 & src2) | (dst & (~src1 | src2))) & (1U << 31)) { - ret = PSR_CARRY; - } - return ret; -} - -static inline uint32_t get_V_sub_icc(uint32_t dst, uint32_t src1, - uint32_t src2) -{ - uint32_t ret = 0; - - if (((src1 ^ src2) & (src1 ^ dst)) & (1U << 31)) { - ret = PSR_OVF; - } - return ret; -} - - -#ifdef TARGET_SPARC64 -static inline uint32_t get_C_sub_xcc(target_ulong src1, target_ulong src2) -{ - uint32_t ret = 0; - - if (src1 < src2) { - ret = PSR_CARRY; - } - return ret; -} - -static inline uint32_t get_C_subx_xcc(target_ulong dst, target_ulong src1, - target_ulong src2) -{ - uint32_t ret = 0; - - if (((~src1 & src2) | (dst & (~src1 | src2))) & (1ULL << 63)) { - ret = PSR_CARRY; - } - return ret; -} - -static inline uint32_t get_V_sub_xcc(target_ulong dst, target_ulong src1, - target_ulong src2) -{ - uint32_t ret = 0; - - if (((src1 ^ src2) & (src1 ^ dst)) & (1ULL << 63)) { - ret = PSR_OVF; - } - return ret; -} - -static uint32_t compute_all_sub_xcc(CPUSPARCState *env) -{ - uint32_t ret; - - ret = get_NZ_xcc(CC_DST); - ret |= get_C_sub_xcc(CC_SRC, CC_SRC2); - ret |= get_V_sub_xcc(CC_DST, CC_SRC, CC_SRC2); - return ret; -} - -static uint32_t compute_C_sub_xcc(CPUSPARCState *env) -{ - return get_C_sub_xcc(CC_SRC, CC_SRC2); -} -#endif - -static uint32_t compute_all_sub(CPUSPARCState *env) -{ - uint32_t ret; - - ret = get_NZ_icc(CC_DST); - ret |= get_C_sub_icc(CC_SRC, CC_SRC2); - ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2); - return ret; -} - -static uint32_t compute_C_sub(CPUSPARCState *env) -{ - return get_C_sub_icc(CC_SRC, CC_SRC2); -} - -#ifdef TARGET_SPARC64 -static uint32_t compute_all_subx_xcc(CPUSPARCState *env) -{ - uint32_t ret; - - ret = get_NZ_xcc(CC_DST); - ret |= get_C_subx_xcc(CC_DST, CC_SRC, CC_SRC2); - ret |= get_V_sub_xcc(CC_DST, CC_SRC, CC_SRC2); - return ret; -} - -static uint32_t compute_C_subx_xcc(CPUSPARCState *env) -{ - return get_C_subx_xcc(CC_DST, CC_SRC, CC_SRC2); -} -#endif - -static uint32_t compute_all_subx(CPUSPARCState *env) -{ - uint32_t ret; - - ret = get_NZ_icc(CC_DST); - ret |= get_C_subx_icc(CC_DST, CC_SRC, CC_SRC2); - ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2); - return ret; -} - -static uint32_t compute_C_subx(CPUSPARCState *env) -{ - return get_C_subx_icc(CC_DST, CC_SRC, CC_SRC2); -} - -static uint32_t compute_all_tsub(CPUSPARCState *env) -{ - uint32_t ret; - - ret = get_NZ_icc(CC_DST); - ret |= get_C_sub_icc(CC_SRC, CC_SRC2); - ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2); - ret |= get_V_tag_icc(CC_SRC, CC_SRC2); - return ret; -} - -static uint32_t compute_all_tsubtv(CPUSPARCState *env) -{ - uint32_t ret; - - ret = get_NZ_icc(CC_DST); - ret |= get_C_sub_icc(CC_SRC, CC_SRC2); - return ret; -} - -static uint32_t compute_all_logic(CPUSPARCState *env) -{ - return get_NZ_icc(CC_DST); -} - -static uint32_t compute_C_logic(CPUSPARCState *env) -{ - return 0; -} - -#ifdef TARGET_SPARC64 -static uint32_t compute_all_logic_xcc(CPUSPARCState *env) -{ - return get_NZ_xcc(CC_DST); -} -#endif - -typedef struct CCTable { - uint32_t (*compute_all)(CPUSPARCState *env); /* return all the flags */ - uint32_t (*compute_c)(CPUSPARCState *env); /* return the C flag */ -} CCTable; - -static const CCTable icc_table[CC_OP_NB] = { - /* CC_OP_DYNAMIC should never happen */ - [CC_OP_FLAGS] = { compute_all_flags, compute_C_flags }, - [CC_OP_DIV] = { compute_all_div, compute_C_div }, - [CC_OP_ADD] = { compute_all_add, compute_C_add }, - [CC_OP_ADDX] = { compute_all_addx, compute_C_addx }, - [CC_OP_TADD] = { compute_all_tadd, compute_C_add }, - [CC_OP_TADDTV] = { compute_all_taddtv, compute_C_add }, - [CC_OP_SUB] = { compute_all_sub, compute_C_sub }, - [CC_OP_SUBX] = { compute_all_subx, compute_C_subx }, - [CC_OP_TSUB] = { compute_all_tsub, compute_C_sub }, - [CC_OP_TSUBTV] = { compute_all_tsubtv, compute_C_sub }, - [CC_OP_LOGIC] = { compute_all_logic, compute_C_logic }, -}; - -#ifdef TARGET_SPARC64 -static const CCTable xcc_table[CC_OP_NB] = { - /* CC_OP_DYNAMIC should never happen */ - [CC_OP_FLAGS] = { compute_all_flags_xcc, compute_C_flags_xcc }, - [CC_OP_DIV] = { compute_all_logic_xcc, compute_C_logic }, - [CC_OP_ADD] = { compute_all_add_xcc, compute_C_add_xcc }, - [CC_OP_ADDX] = { compute_all_addx_xcc, compute_C_addx_xcc }, - [CC_OP_TADD] = { compute_all_add_xcc, compute_C_add_xcc }, - [CC_OP_TADDTV] = { compute_all_add_xcc, compute_C_add_xcc }, - [CC_OP_SUB] = { compute_all_sub_xcc, compute_C_sub_xcc }, - [CC_OP_SUBX] = { compute_all_subx_xcc, compute_C_subx_xcc }, - [CC_OP_TSUB] = { compute_all_sub_xcc, compute_C_sub_xcc }, - [CC_OP_TSUBTV] = { compute_all_sub_xcc, compute_C_sub_xcc }, - [CC_OP_LOGIC] = { compute_all_logic_xcc, compute_C_logic }, -}; -#endif - -void helper_compute_psr(CPUSPARCState *env) -{ - uint32_t new_psr; - - new_psr = icc_table[CC_OP].compute_all(env); - env->psr = new_psr; -#ifdef TARGET_SPARC64 - new_psr = xcc_table[CC_OP].compute_all(env); - env->xcc = new_psr; -#endif - CC_OP = CC_OP_FLAGS; -} - -uint32_t helper_compute_C_icc(CPUSPARCState *env) -{ - return icc_table[CC_OP].compute_c(env) >> PSR_CARRY_SHIFT; -} diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index bb1a155510..befa7fc4eb 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -46,7 +46,6 @@ static void sparc_cpu_reset_hold(Object *obj) env->wim = 1; #endif env->regwptr = env->regbase + (env->cwp * 16); - CC_OP = CC_OP_FLAGS; #if defined(CONFIG_USER_ONLY) #ifdef TARGET_SPARC64 env->cleanwin = env->nwindows - 2; diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h index 758a4e8aaa..3e361a5b75 100644 --- a/target/sparc/cpu.h +++ b/target/sparc/cpu.h @@ -137,32 +137,6 @@ enum { #define PSR_CWP 0x1f #endif -#define CC_SRC (env->cc_src) -#define CC_SRC2 (env->cc_src2) -#define CC_DST (env->cc_dst) -#define CC_OP (env->cc_op) - -/* Even though lazy evaluation of CPU condition codes tends to be less - * important on RISC systems where condition codes are only updated - * when explicitly requested, SPARC uses it to update 32-bit and 64-bit - * condition codes. - */ -enum { - CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ - CC_OP_FLAGS, /* all cc are back in status register */ - CC_OP_DIV, /* modify N, Z and V, C = 0*/ - CC_OP_ADD, /* modify all flags, CC_DST = res, CC_SRC = src1 */ - CC_OP_ADDX, /* modify all flags, CC_DST = res, CC_SRC = src1 */ - CC_OP_TADD, /* modify all flags, CC_DST = res, CC_SRC = src1 */ - CC_OP_TADDTV, /* modify all flags except V, CC_DST = res, CC_SRC = src1 */ - CC_OP_SUB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ - CC_OP_SUBX, /* modify all flags, CC_DST = res, CC_SRC = src1 */ - CC_OP_TSUB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ - CC_OP_TSUBTV, /* modify all flags except V, CC_DST = res, CC_SRC = src1 */ - CC_OP_LOGIC, /* modify N and Z, C = V = 0, CC_DST = res */ - CC_OP_NB, -}; - /* Trap base register */ #define TBR_BASE_MASK 0xfffff000 @@ -458,15 +432,35 @@ struct CPUArchState { target_ulong npc; /* next program counter */ target_ulong y; /* multiply/divide register */ - /* emulator internal flags handling */ - target_ulong cc_src, cc_src2; - target_ulong cc_dst; - uint32_t cc_op; + /* + * Bit 31 is for icc, bit 63 for xcc. + * Other bits are garbage. + */ + target_long cc_N; + target_long cc_V; + + /* + * Z is represented as == 0; any non-zero value is !Z. + * For sparc64, the high 32-bits of icc.Z are garbage. + */ + target_ulong icc_Z; +#ifdef TARGET_SPARC64 + target_ulong xcc_Z; +#endif + + /* + * For sparc32, icc.C is boolean. + * For sparc64, xcc.C is boolean; + * icc.C is bit 32 with other bits garbage. + */ + target_ulong icc_C; +#ifdef TARGET_SPARC64 + target_ulong xcc_C; +#endif target_ulong cond; /* conditional branch result (XXX: save it in a temporary register when possible) */ - uint32_t psr; /* processor state register */ target_ulong fsr; /* FPU state register */ CPU_DoubleU fpr[TARGET_DPREGS]; /* floating point registers */ uint32_t cwp; /* index of current register window (extracted @@ -522,7 +516,6 @@ struct CPUArchState { #define MAXTL_MAX 8 #define MAXTL_MASK (MAXTL_MAX - 1) trap_state ts[MAXTL_MAX]; - uint32_t xcc; /* Extended integer condition codes */ uint32_t asi; uint32_t pstate; uint32_t tl; @@ -619,6 +612,7 @@ void sparc_restore_state_to_opc(CPUState *cs, /* win_helper.c */ target_ulong cpu_get_psr(CPUSPARCState *env1); void cpu_put_psr(CPUSPARCState *env1, target_ulong val); +void cpu_put_psr_icc(CPUSPARCState *env1, target_ulong val); void cpu_put_psr_raw(CPUSPARCState *env1, target_ulong val); #ifdef TARGET_SPARC64 void cpu_change_pstate(CPUSPARCState *env1, uint32_t new_pstate); diff --git a/target/sparc/helper.c b/target/sparc/helper.c index 2bcdc81d54..bd10b60e4b 100644 --- a/target/sparc/helper.c +++ b/target/sparc/helper.c @@ -81,109 +81,58 @@ void helper_tick_set_limit(void *opaque, uint64_t limit) } #endif -static target_ulong do_udiv(CPUSPARCState *env, target_ulong a, - target_ulong b, int cc, uintptr_t ra) +uint64_t helper_udiv(CPUSPARCState *env, target_ulong a, target_ulong b) { - int overflow = 0; - uint64_t x0; - uint32_t x1; + uint64_t a64 = (uint32_t)a | ((uint64_t)env->y << 32); + uint32_t b32 = b; + uint32_t r; - x0 = (a & 0xffffffff) | ((int64_t) (env->y) << 32); - x1 = (b & 0xffffffff); - - if (x1 == 0) { - cpu_raise_exception_ra(env, TT_DIV_ZERO, ra); - } - - x0 = x0 / x1; - if (x0 > UINT32_MAX) { - x0 = UINT32_MAX; - overflow = 1; + if (b32 == 0) { + cpu_raise_exception_ra(env, TT_DIV_ZERO, GETPC()); } - if (cc) { - env->cc_src2 = overflow; + a64 /= b32; + r = a64; + if (unlikely(a64 > UINT32_MAX)) { + return -1; /* r = UINT32_MAX, v = 1 */ } - return x0; + return r; } -target_ulong helper_udiv(CPUSPARCState *env, target_ulong a, target_ulong b) +uint64_t helper_sdiv(CPUSPARCState *env, target_ulong a, target_ulong b) { - return do_udiv(env, a, b, 0, GETPC()); -} + int64_t a64 = (uint32_t)a | ((uint64_t)env->y << 32); + int32_t b32 = b; + int32_t r; -target_ulong helper_udiv_cc(CPUSPARCState *env, target_ulong a, target_ulong b) -{ - return do_udiv(env, a, b, 1, GETPC()); -} - -static target_ulong do_sdiv(CPUSPARCState *env, target_ulong a, - target_ulong b, int cc, uintptr_t ra) -{ - int overflow = 0; - int64_t x0; - int32_t x1; - - x0 = (a & 0xffffffff) | ((int64_t) (env->y) << 32); - x1 = (b & 0xffffffff); - - if (x1 == 0) { - cpu_raise_exception_ra(env, TT_DIV_ZERO, ra); - } else if (x1 == -1 && x0 == INT64_MIN) { - x0 = INT32_MAX; - overflow = 1; - } else { - x0 = x0 / x1; - if ((int32_t) x0 != x0) { - x0 = x0 < 0 ? INT32_MIN : INT32_MAX; - overflow = 1; - } - } - - if (cc) { - env->cc_src2 = overflow; + if (b32 == 0) { + cpu_raise_exception_ra(env, TT_DIV_ZERO, GETPC()); } - return x0; -} - -target_ulong helper_sdiv(CPUSPARCState *env, target_ulong a, target_ulong b) -{ - return do_sdiv(env, a, b, 0, GETPC()); -} - -target_ulong helper_sdiv_cc(CPUSPARCState *env, target_ulong a, target_ulong b) -{ - return do_sdiv(env, a, b, 1, GETPC()); -} -#ifdef TARGET_SPARC64 -int64_t helper_sdivx(CPUSPARCState *env, int64_t a, int64_t b) -{ - if (b == 0) { - /* Raise divide by zero trap. */ - cpu_raise_exception_ra(env, TT_DIV_ZERO, GETPC()); - } else if (b == -1) { - /* Avoid overflow trap with i386 divide insn. */ - return -a; - } else { - return a / b; + if (unlikely(a64 == INT64_MIN)) { + /* + * Special case INT64_MIN / -1 is required to avoid trap on x86 host. + * However, with a dividend of INT64_MIN, there is no 32-bit divisor + * which can yield a 32-bit result: + * INT64_MIN / INT32_MIN = 0x1_0000_0000 + * INT64_MIN / INT32_MAX = -0x1_0000_0002 + * Therefore we know we must overflow and saturate. + */ + return (uint32_t)(b32 < 0 ? INT32_MAX : INT32_MIN) | (-1ull << 32); } -} -uint64_t helper_udivx(CPUSPARCState *env, uint64_t a, uint64_t b) -{ - if (b == 0) { - /* Raise divide by zero trap. */ - cpu_raise_exception_ra(env, TT_DIV_ZERO, GETPC()); + a64 /= b; + r = a64; + if (unlikely(r != a64)) { + return (uint32_t)(a64 < 0 ? INT32_MIN : INT32_MAX) | (-1ull << 32); } - return a / b; + return (uint32_t)r; } -#endif target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1, target_ulong src2) { - target_ulong dst; + target_ulong dst, v; /* Tag overflow occurs if either input has bits 0 or 1 set. */ if ((src1 | src2) & 3) { @@ -193,13 +142,23 @@ target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1, dst = src1 + src2; /* Tag overflow occurs if the addition overflows. */ - if (~(src1 ^ src2) & (src1 ^ dst) & (1u << 31)) { + v = ~(src1 ^ src2) & (src1 ^ dst); + if (v & (1u << 31)) { goto tag_overflow; } /* Only modify the CC after any exceptions have been generated. */ - env->cc_src = src1; - env->cc_src2 = src2; + env->cc_V = v; + env->cc_N = dst; + env->icc_Z = dst; +#ifdef TARGET_SPARC64 + env->xcc_Z = dst; + env->icc_C = dst ^ src1 ^ src2; + env->xcc_C = dst < src1; +#else + env->icc_C = dst < src1; +#endif + return dst; tag_overflow: @@ -209,7 +168,7 @@ target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1, target_ulong helper_tsubcctv(CPUSPARCState *env, target_ulong src1, target_ulong src2) { - target_ulong dst; + target_ulong dst, v; /* Tag overflow occurs if either input has bits 0 or 1 set. */ if ((src1 | src2) & 3) { @@ -219,13 +178,23 @@ target_ulong helper_tsubcctv(CPUSPARCState *env, target_ulong src1, dst = src1 - src2; /* Tag overflow occurs if the subtraction overflows. */ - if ((src1 ^ src2) & (src1 ^ dst) & (1u << 31)) { + v = (src1 ^ src2) & (src1 ^ dst); + if (v & (1u << 31)) { goto tag_overflow; } /* Only modify the CC after any exceptions have been generated. */ - env->cc_src = src1; - env->cc_src2 = src2; + env->cc_V = v; + env->cc_N = dst; + env->icc_Z = dst; +#ifdef TARGET_SPARC64 + env->xcc_Z = dst; + env->icc_C = dst ^ src1 ^ src2; + env->xcc_C = src1 < src2; +#else + env->icc_C = src1 < src2; +#endif + return dst; tag_overflow: diff --git a/target/sparc/helper.h b/target/sparc/helper.h index dd1721a340..55eff66283 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -27,16 +27,10 @@ DEF_HELPER_FLAGS_2(tick_set_limit, TCG_CALL_NO_RWG, void, ptr, i64) DEF_HELPER_1(debug, void, env) DEF_HELPER_1(save, void, env) DEF_HELPER_1(restore, void, env) -DEF_HELPER_3(udiv, tl, env, tl, tl) -DEF_HELPER_3(udiv_cc, tl, env, tl, tl) -DEF_HELPER_3(sdiv, tl, env, tl, tl) -DEF_HELPER_3(sdiv_cc, tl, env, tl, tl) +DEF_HELPER_FLAGS_3(udiv, TCG_CALL_NO_WG, i64, env, tl, tl) +DEF_HELPER_FLAGS_3(sdiv, TCG_CALL_NO_WG, i64, env, tl, tl) DEF_HELPER_3(taddcctv, tl, env, tl, tl) DEF_HELPER_3(tsubcctv, tl, env, tl, tl) -#ifdef TARGET_SPARC64 -DEF_HELPER_FLAGS_3(sdivx, TCG_CALL_NO_WG, s64, env, s64, s64) -DEF_HELPER_FLAGS_3(udivx, TCG_CALL_NO_WG, i64, env, i64, i64) -#endif #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64) DEF_HELPER_FLAGS_4(ld_asi, TCG_CALL_NO_WG, i64, env, tl, int, i32) DEF_HELPER_FLAGS_5(st_asi, TCG_CALL_NO_WG, void, env, tl, i64, int, i32) @@ -150,5 +144,3 @@ VIS_CMPHELPER(cmpne) #undef F_HELPER_0_1 #undef VIS_HELPER #undef VIS_CMPHELPER -DEF_HELPER_1(compute_psr, void, env) -DEF_HELPER_FLAGS_1(compute_C_icc, TCG_CALL_NO_WG_SE, i32, env) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 0552f1447d..2d26404cb2 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -182,9 +182,10 @@ UMUL 10 ..... 0.1010 ..... . ............. @r_r_ri_cc SMUL 10 ..... 0.1011 ..... . ............. @r_r_ri_cc MULScc 10 ..... 100100 ..... . ............. @r_r_ri_cc1 -UDIVX 10 ..... 001101 ..... . ............. @r_r_ri_cc0 -SDIVX 10 ..... 101101 ..... . ............. @r_r_ri_cc0 -UDIV 10 ..... 0.1110 ..... . ............. @r_r_ri_cc +UDIVX 10 ..... 001101 ..... . ............. @r_r_ri +SDIVX 10 ..... 101101 ..... . ............. @r_r_ri +UDIV 10 ..... 001110 ..... . ............. @r_r_ri +UDIVcc 10 ..... 011110 ..... . ............. @r_r_ri_cc1 SDIV 10 ..... 0.1111 ..... . ............. @r_r_ri_cc TADDcc 10 ..... 100000 ..... . ............. @r_r_ri_cc1 diff --git a/target/sparc/int32_helper.c b/target/sparc/int32_helper.c index 82e8418e46..1563613582 100644 --- a/target/sparc/int32_helper.c +++ b/target/sparc/int32_helper.c @@ -103,11 +103,6 @@ void sparc_cpu_do_interrupt(CPUState *cs) CPUSPARCState *env = &cpu->env; int cwp, intno = cs->exception_index; - /* Compute PSR before exposing state. */ - if (env->cc_op != CC_OP_FLAGS) { - cpu_get_psr(env); - } - if (qemu_loglevel_mask(CPU_LOG_INT)) { static int count; const char *name; diff --git a/target/sparc/int64_helper.c b/target/sparc/int64_helper.c index 793e57c536..1b4155f5f3 100644 --- a/target/sparc/int64_helper.c +++ b/target/sparc/int64_helper.c @@ -135,11 +135,6 @@ void sparc_cpu_do_interrupt(CPUState *cs) int intno = cs->exception_index; trap_state *tsptr; - /* Compute PSR before exposing state. */ - if (env->cc_op != CC_OP_FLAGS) { - cpu_get_psr(env); - } - #ifdef DEBUG_PCALL if (qemu_loglevel_mask(CPU_LOG_INT)) { static int count; diff --git a/target/sparc/machine.c b/target/sparc/machine.c index 274e1217df..44dfc07014 100644 --- a/target/sparc/machine.c +++ b/target/sparc/machine.c @@ -83,6 +83,42 @@ static const VMStateInfo vmstate_psr = { .put = put_psr, }; +#ifdef TARGET_SPARC64 +static int get_xcc(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field) +{ + SPARCCPU *cpu = opaque; + CPUSPARCState *env = &cpu->env; + uint32_t val = qemu_get_be32(f); + + /* Do not clobber icc.[NV] */ + env->cc_N = deposit64(env->cc_N, 32, 32, -(val & PSR_NEG)); + env->cc_V = deposit64(env->cc_V, 32, 32, -(val & PSR_OVF)); + env->xcc_Z = ~val & PSR_ZERO; + env->xcc_C = (val >> PSR_CARRY_SHIFT) & 1; + + return 0; +} + +static int put_xcc(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field, JSONWriter *vmdesc) +{ + SPARCCPU *cpu = opaque; + CPUSPARCState *env = &cpu->env; + uint32_t val = cpu_get_ccr(env); + + /* Extract just xcc out of ccr and shift into legacy position. */ + qemu_put_be32(f, (val & 0xf0) << (20 - 4)); + return 0; +} + +static const VMStateInfo vmstate_xcc = { + .name = "xcc", + .get = get_xcc, + .put = put_xcc, +}; +#endif + static int cpu_pre_save(void *opaque) { SPARCCPU *cpu = opaque; @@ -155,7 +191,14 @@ const VMStateDescription vmstate_sparc_cpu = { VMSTATE_UINT32(env.mmu_version, SPARCCPU), VMSTATE_STRUCT_ARRAY(env.ts, SPARCCPU, MAXTL_MAX, 0, vmstate_trap_state, trap_state), - VMSTATE_UINT32(env.xcc, SPARCCPU), + { + .name = "xcc", + .version_id = 0, + .size = sizeof(uint32_t), + .info = &vmstate_xcc, + .flags = VMS_SINGLE, + .offset = 0, + }, VMSTATE_UINT32(env.asi, SPARCCPU), VMSTATE_UINT32(env.pstate, SPARCCPU), VMSTATE_UINT32(env.tl, SPARCCPU), diff --git a/target/sparc/meson.build b/target/sparc/meson.build index c316773db6..46289c8669 100644 --- a/target/sparc/meson.build +++ b/target/sparc/meson.build @@ -3,7 +3,6 @@ gen = decodetree.process('insns.decode') sparc_ss = ss.source_set() sparc_ss.add(gen) sparc_ss.add(files( - 'cc_helper.c', 'cpu.c', 'fop_helper.c', 'gdbstub.c', diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 986a88c4e1..6fc333a6b8 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -51,12 +51,10 @@ # define gen_helper_restored(E) qemu_build_not_reached() # define gen_helper_retry(E) qemu_build_not_reached() # define gen_helper_saved(E) qemu_build_not_reached() -# define gen_helper_sdivx(D, E, A, B) qemu_build_not_reached() # define gen_helper_set_softint(E, S) qemu_build_not_reached() # define gen_helper_tick_get_count(D, E, T, C) qemu_build_not_reached() # define gen_helper_tick_set_count(P, S) qemu_build_not_reached() # define gen_helper_tick_set_limit(P, S) qemu_build_not_reached() -# define gen_helper_udivx(D, E, A, B) qemu_build_not_reached() # define gen_helper_wrccr(E, S) qemu_build_not_reached() # define gen_helper_wrcwp(E, S) qemu_build_not_reached() # define gen_helper_wrgl(E, S) qemu_build_not_reached() @@ -105,21 +103,35 @@ /* global register indexes */ static TCGv_ptr cpu_regwptr; -static TCGv cpu_cc_src, cpu_cc_src2, cpu_cc_dst; -static TCGv_i32 cpu_cc_op; -static TCGv_i32 cpu_psr; static TCGv cpu_fsr, cpu_pc, cpu_npc; static TCGv cpu_regs[32]; static TCGv cpu_y; static TCGv cpu_tbr; static TCGv cpu_cond; +static TCGv cpu_cc_N; +static TCGv cpu_cc_V; +static TCGv cpu_icc_Z; +static TCGv cpu_icc_C; #ifdef TARGET_SPARC64 -static TCGv_i32 cpu_xcc, cpu_fprs; +static TCGv cpu_xcc_Z; +static TCGv cpu_xcc_C; +static TCGv_i32 cpu_fprs; static TCGv cpu_gsr; #else # define cpu_fprs ({ qemu_build_not_reached(); (TCGv)NULL; }) # define cpu_gsr ({ qemu_build_not_reached(); (TCGv)NULL; }) #endif + +#ifdef TARGET_SPARC64 +#define cpu_cc_Z cpu_xcc_Z +#define cpu_cc_C cpu_xcc_C +#else +#define cpu_cc_Z cpu_icc_Z +#define cpu_cc_C cpu_icc_C +#define cpu_xcc_Z ({ qemu_build_not_reached(); NULL; }) +#define cpu_xcc_C ({ qemu_build_not_reached(); NULL; }) +#endif + /* Floating point registers */ static TCGv_i64 cpu_fpr[TARGET_DPREGS]; @@ -132,6 +144,12 @@ static TCGv_i64 cpu_fpr[TARGET_DPREGS]; # define env64_field_offsetof(X) ({ qemu_build_not_reached(); 0; }) #endif +typedef struct DisasCompare { + TCGCond cond; + TCGv c1; + int c2; +} DisasCompare; + typedef struct DisasDelayException { struct DisasDelayException *next; TCGLabel *lab; @@ -145,8 +163,13 @@ typedef struct DisasContext { DisasContextBase base; target_ulong pc; /* current Program Counter: integer or DYNAMIC_PC */ target_ulong npc; /* next PC: integer or DYNAMIC_PC or JUMP_PC */ - target_ulong jump_pc[2]; /* used when JUMP_PC pc value is used */ + + /* Used when JUMP_PC value is used. */ + DisasCompare jump; + target_ulong jump_pc[2]; + int mem_idx; + bool cpu_cond_live; bool fpu_enabled; bool address_mask_32bit; #ifndef CONFIG_USER_ONLY @@ -156,7 +179,6 @@ typedef struct DisasContext { #endif #endif - uint32_t cc_op; /* current CC operation */ sparc_def_t *def; #ifdef TARGET_SPARC64 int fprs_dirty; @@ -165,12 +187,6 @@ typedef struct DisasContext { DisasDelayException *delay_excp_list; } DisasContext; -typedef struct { - TCGCond cond; - bool is_bool; - TCGv c1, c2; -} DisasCompare; - // This function uses non-native bit order #define GET_FIELD(X, FROM, TO) \ ((X) >> (31 - (TO)) & ((1 << ((TO) - (FROM) + 1)) - 1)) @@ -366,293 +382,162 @@ static void gen_goto_tb(DisasContext *s, int tb_num, } } -// XXX suboptimal -static void gen_mov_reg_N(TCGv reg, TCGv_i32 src) -{ - tcg_gen_extu_i32_tl(reg, src); - tcg_gen_extract_tl(reg, reg, PSR_NEG_SHIFT, 1); -} - -static void gen_mov_reg_Z(TCGv reg, TCGv_i32 src) -{ - tcg_gen_extu_i32_tl(reg, src); - tcg_gen_extract_tl(reg, reg, PSR_ZERO_SHIFT, 1); -} - -static void gen_mov_reg_V(TCGv reg, TCGv_i32 src) -{ - tcg_gen_extu_i32_tl(reg, src); - tcg_gen_extract_tl(reg, reg, PSR_OVF_SHIFT, 1); -} - -static void gen_mov_reg_C(TCGv reg, TCGv_i32 src) -{ - tcg_gen_extu_i32_tl(reg, src); - tcg_gen_extract_tl(reg, reg, PSR_CARRY_SHIFT, 1); -} - -static void gen_op_add_cc(TCGv dst, TCGv src1, TCGv src2) -{ - tcg_gen_mov_tl(cpu_cc_src, src1); - tcg_gen_mov_tl(cpu_cc_src2, src2); - tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2); - tcg_gen_mov_tl(dst, cpu_cc_dst); -} - -static TCGv_i32 gen_add32_carry32(void) -{ - TCGv_i32 carry_32, cc_src1_32, cc_src2_32; - - /* Carry is computed from a previous add: (dst < src) */ -#if TARGET_LONG_BITS == 64 - cc_src1_32 = tcg_temp_new_i32(); - cc_src2_32 = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(cc_src1_32, cpu_cc_dst); - tcg_gen_extrl_i64_i32(cc_src2_32, cpu_cc_src); -#else - cc_src1_32 = cpu_cc_dst; - cc_src2_32 = cpu_cc_src; -#endif - - carry_32 = tcg_temp_new_i32(); - tcg_gen_setcond_i32(TCG_COND_LTU, carry_32, cc_src1_32, cc_src2_32); - - return carry_32; -} - -static TCGv_i32 gen_sub32_carry32(void) -{ - TCGv_i32 carry_32, cc_src1_32, cc_src2_32; - - /* Carry is computed from a previous borrow: (src1 < src2) */ -#if TARGET_LONG_BITS == 64 - cc_src1_32 = tcg_temp_new_i32(); - cc_src2_32 = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(cc_src1_32, cpu_cc_src); - tcg_gen_extrl_i64_i32(cc_src2_32, cpu_cc_src2); -#else - cc_src1_32 = cpu_cc_src; - cc_src2_32 = cpu_cc_src2; -#endif - - carry_32 = tcg_temp_new_i32(); - tcg_gen_setcond_i32(TCG_COND_LTU, carry_32, cc_src1_32, cc_src2_32); - - return carry_32; -} - -static void gen_op_addc_int(TCGv dst, TCGv src1, TCGv src2, - TCGv_i32 carry_32, bool update_cc) +static TCGv gen_carry32(void) { - tcg_gen_add_tl(dst, src1, src2); - -#ifdef TARGET_SPARC64 - TCGv carry = tcg_temp_new(); - tcg_gen_extu_i32_tl(carry, carry_32); - tcg_gen_add_tl(dst, dst, carry); -#else - tcg_gen_add_i32(dst, dst, carry_32); -#endif - - if (update_cc) { - tcg_debug_assert(dst == cpu_cc_dst); - tcg_gen_mov_tl(cpu_cc_src, src1); - tcg_gen_mov_tl(cpu_cc_src2, src2); + if (TARGET_LONG_BITS == 64) { + TCGv t = tcg_temp_new(); + tcg_gen_extract_tl(t, cpu_icc_C, 32, 1); + return t; } + return cpu_icc_C; } -static void gen_op_addc_int_add(TCGv dst, TCGv src1, TCGv src2, bool update_cc) +static void gen_op_addcc_int(TCGv dst, TCGv src1, TCGv src2, TCGv cin) { - TCGv discard; + TCGv z = tcg_constant_tl(0); - if (TARGET_LONG_BITS == 64) { - gen_op_addc_int(dst, src1, src2, gen_add32_carry32(), update_cc); - return; + if (cin) { + tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, src1, z, cin, z); + tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, cpu_cc_N, cpu_cc_C, src2, z); + } else { + tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, src1, z, src2, z); } - - /* - * We can re-use the host's hardware carry generation by using - * an ADD2 opcode. We discard the low part of the output. - * Ideally we'd combine this operation with the add that - * generated the carry in the first place. - */ - discard = tcg_temp_new(); - tcg_gen_add2_tl(discard, dst, cpu_cc_src, src1, cpu_cc_src2, src2); - - if (update_cc) { - tcg_debug_assert(dst == cpu_cc_dst); - tcg_gen_mov_tl(cpu_cc_src, src1); - tcg_gen_mov_tl(cpu_cc_src2, src2); + tcg_gen_xor_tl(cpu_cc_Z, src1, src2); + tcg_gen_xor_tl(cpu_cc_V, cpu_cc_N, src2); + tcg_gen_andc_tl(cpu_cc_V, cpu_cc_V, cpu_cc_Z); + if (TARGET_LONG_BITS == 64) { + /* + * Carry-in to bit 32 is result ^ src1 ^ src2. + * We already have the src xor term in Z, from computation of V. + */ + tcg_gen_xor_tl(cpu_icc_C, cpu_cc_Z, cpu_cc_N); + tcg_gen_mov_tl(cpu_icc_Z, cpu_cc_N); } + tcg_gen_mov_tl(cpu_cc_Z, cpu_cc_N); + tcg_gen_mov_tl(dst, cpu_cc_N); } -static void gen_op_addc_add(TCGv dst, TCGv src1, TCGv src2) +static void gen_op_addcc(TCGv dst, TCGv src1, TCGv src2) { - gen_op_addc_int_add(dst, src1, src2, false); + gen_op_addcc_int(dst, src1, src2, NULL); } -static void gen_op_addccc_add(TCGv dst, TCGv src1, TCGv src2) +static void gen_op_taddcc(TCGv dst, TCGv src1, TCGv src2) { - gen_op_addc_int_add(dst, src1, src2, true); -} - -static void gen_op_addc_sub(TCGv dst, TCGv src1, TCGv src2) -{ - gen_op_addc_int(dst, src1, src2, gen_sub32_carry32(), false); -} + TCGv t = tcg_temp_new(); -static void gen_op_addccc_sub(TCGv dst, TCGv src1, TCGv src2) -{ - gen_op_addc_int(dst, src1, src2, gen_sub32_carry32(), true); -} + /* Save the tag bits around modification of dst. */ + tcg_gen_or_tl(t, src1, src2); -static void gen_op_addc_int_generic(TCGv dst, TCGv src1, TCGv src2, - bool update_cc) -{ - TCGv_i32 carry_32 = tcg_temp_new_i32(); - gen_helper_compute_C_icc(carry_32, tcg_env); - gen_op_addc_int(dst, src1, src2, carry_32, update_cc); -} + gen_op_addcc(dst, src1, src2); -static void gen_op_addc_generic(TCGv dst, TCGv src1, TCGv src2) -{ - gen_op_addc_int_generic(dst, src1, src2, false); + /* Incorprate tag bits into icc.V */ + tcg_gen_andi_tl(t, t, 3); + tcg_gen_neg_tl(t, t); + tcg_gen_ext32u_tl(t, t); + tcg_gen_or_tl(cpu_cc_V, cpu_cc_V, t); } -static void gen_op_addccc_generic(TCGv dst, TCGv src1, TCGv src2) +static void gen_op_addc(TCGv dst, TCGv src1, TCGv src2) { - gen_op_addc_int_generic(dst, src1, src2, true); + tcg_gen_add_tl(dst, src1, src2); + tcg_gen_add_tl(dst, dst, gen_carry32()); } -static void gen_op_sub_cc(TCGv dst, TCGv src1, TCGv src2) +static void gen_op_addccc(TCGv dst, TCGv src1, TCGv src2) { - tcg_gen_mov_tl(cpu_cc_src, src1); - tcg_gen_mov_tl(cpu_cc_src2, src2); - tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2); - tcg_gen_mov_tl(dst, cpu_cc_dst); + gen_op_addcc_int(dst, src1, src2, gen_carry32()); } -static void gen_op_subc_int(TCGv dst, TCGv src1, TCGv src2, - TCGv_i32 carry_32, bool update_cc) +static void gen_op_subcc_int(TCGv dst, TCGv src1, TCGv src2, TCGv cin) { - TCGv carry; - -#if TARGET_LONG_BITS == 64 - carry = tcg_temp_new(); - tcg_gen_extu_i32_i64(carry, carry_32); -#else - carry = carry_32; -#endif + TCGv z = tcg_constant_tl(0); - tcg_gen_sub_tl(dst, src1, src2); - tcg_gen_sub_tl(dst, dst, carry); - - if (update_cc) { - tcg_debug_assert(dst == cpu_cc_dst); - tcg_gen_mov_tl(cpu_cc_src, src1); - tcg_gen_mov_tl(cpu_cc_src2, src2); + if (cin) { + tcg_gen_sub2_tl(cpu_cc_N, cpu_cc_C, src1, z, cin, z); + tcg_gen_sub2_tl(cpu_cc_N, cpu_cc_C, cpu_cc_N, cpu_cc_C, src2, z); + } else { + tcg_gen_sub2_tl(cpu_cc_N, cpu_cc_C, src1, z, src2, z); } + tcg_gen_neg_tl(cpu_cc_C, cpu_cc_C); + tcg_gen_xor_tl(cpu_cc_Z, src1, src2); + tcg_gen_xor_tl(cpu_cc_V, cpu_cc_N, src1); + tcg_gen_and_tl(cpu_cc_V, cpu_cc_V, cpu_cc_Z); +#ifdef TARGET_SPARC64 + tcg_gen_xor_tl(cpu_icc_C, cpu_cc_Z, cpu_cc_N); + tcg_gen_mov_tl(cpu_icc_Z, cpu_cc_N); +#endif + tcg_gen_mov_tl(cpu_cc_Z, cpu_cc_N); + tcg_gen_mov_tl(dst, cpu_cc_N); } -static void gen_op_subc_add(TCGv dst, TCGv src1, TCGv src2) -{ - gen_op_subc_int(dst, src1, src2, gen_add32_carry32(), false); -} - -static void gen_op_subccc_add(TCGv dst, TCGv src1, TCGv src2) -{ - gen_op_subc_int(dst, src1, src2, gen_add32_carry32(), true); -} - -static void gen_op_subc_int_sub(TCGv dst, TCGv src1, TCGv src2, bool update_cc) +static void gen_op_subcc(TCGv dst, TCGv src1, TCGv src2) { - TCGv discard; - - if (TARGET_LONG_BITS == 64) { - gen_op_subc_int(dst, src1, src2, gen_sub32_carry32(), update_cc); - return; - } - - /* - * We can re-use the host's hardware carry generation by using - * a SUB2 opcode. We discard the low part of the output. - */ - discard = tcg_temp_new(); - tcg_gen_sub2_tl(discard, dst, cpu_cc_src, src1, cpu_cc_src2, src2); - - if (update_cc) { - tcg_debug_assert(dst == cpu_cc_dst); - tcg_gen_mov_tl(cpu_cc_src, src1); - tcg_gen_mov_tl(cpu_cc_src2, src2); - } + gen_op_subcc_int(dst, src1, src2, NULL); } -static void gen_op_subc_sub(TCGv dst, TCGv src1, TCGv src2) +static void gen_op_tsubcc(TCGv dst, TCGv src1, TCGv src2) { - gen_op_subc_int_sub(dst, src1, src2, false); -} + TCGv t = tcg_temp_new(); -static void gen_op_subccc_sub(TCGv dst, TCGv src1, TCGv src2) -{ - gen_op_subc_int_sub(dst, src1, src2, true); -} + /* Save the tag bits around modification of dst. */ + tcg_gen_or_tl(t, src1, src2); -static void gen_op_subc_int_generic(TCGv dst, TCGv src1, TCGv src2, - bool update_cc) -{ - TCGv_i32 carry_32 = tcg_temp_new_i32(); + gen_op_subcc(dst, src1, src2); - gen_helper_compute_C_icc(carry_32, tcg_env); - gen_op_subc_int(dst, src1, src2, carry_32, update_cc); + /* Incorprate tag bits into icc.V */ + tcg_gen_andi_tl(t, t, 3); + tcg_gen_neg_tl(t, t); + tcg_gen_ext32u_tl(t, t); + tcg_gen_or_tl(cpu_cc_V, cpu_cc_V, t); } -static void gen_op_subc_generic(TCGv dst, TCGv src1, TCGv src2) +static void gen_op_subc(TCGv dst, TCGv src1, TCGv src2) { - gen_op_subc_int_generic(dst, src1, src2, false); + tcg_gen_sub_tl(dst, src1, src2); + tcg_gen_sub_tl(dst, dst, gen_carry32()); } -static void gen_op_subccc_generic(TCGv dst, TCGv src1, TCGv src2) +static void gen_op_subccc(TCGv dst, TCGv src1, TCGv src2) { - gen_op_subc_int_generic(dst, src1, src2, true); + gen_op_subcc_int(dst, src1, src2, gen_carry32()); } static void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2) { - TCGv r_temp, zero, t0; + TCGv zero = tcg_constant_tl(0); + TCGv t_src1 = tcg_temp_new(); + TCGv t_src2 = tcg_temp_new(); + TCGv t0 = tcg_temp_new(); - r_temp = tcg_temp_new(); - t0 = tcg_temp_new(); + tcg_gen_ext32u_tl(t_src1, src1); + tcg_gen_ext32u_tl(t_src2, src2); - /* old op: - if (!(env->y & 1)) - T1 = 0; - */ - zero = tcg_constant_tl(0); - tcg_gen_andi_tl(cpu_cc_src, src1, 0xffffffff); - tcg_gen_andi_tl(r_temp, cpu_y, 0x1); - tcg_gen_andi_tl(cpu_cc_src2, src2, 0xffffffff); - tcg_gen_movcond_tl(TCG_COND_EQ, cpu_cc_src2, r_temp, zero, - zero, cpu_cc_src2); + /* + * if (!(env->y & 1)) + * src2 = 0; + */ + tcg_gen_andi_tl(t0, cpu_y, 0x1); + tcg_gen_movcond_tl(TCG_COND_EQ, t_src2, t0, zero, zero, t_src2); - // b2 = T0 & 1; - // env->y = (b2 << 31) | (env->y >> 1); + /* + * b2 = src1 & 1; + * y = (b2 << 31) | (y >> 1); + */ tcg_gen_extract_tl(t0, cpu_y, 1, 31); - tcg_gen_deposit_tl(cpu_y, t0, cpu_cc_src, 31, 1); + tcg_gen_deposit_tl(cpu_y, t0, src1, 31, 1); // b1 = N ^ V; - gen_mov_reg_N(t0, cpu_psr); - gen_mov_reg_V(r_temp, cpu_psr); - tcg_gen_xor_tl(t0, t0, r_temp); + tcg_gen_xor_tl(t0, cpu_cc_N, cpu_cc_V); - // T0 = (b1 << 31) | (T0 >> 1); - // src1 = T0; - tcg_gen_shli_tl(t0, t0, 31); - tcg_gen_shri_tl(cpu_cc_src, cpu_cc_src, 1); - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0); - - tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2); + /* + * src1 = (b1 << 31) | (src1 >> 1) + */ + tcg_gen_andi_tl(t0, t0, 1u << 31); + tcg_gen_shri_tl(t_src1, t_src1, 1); + tcg_gen_or_tl(t_src1, t_src1, t0); - tcg_gen_mov_tl(dst, cpu_cc_dst); + gen_op_addcc(dst, t_src1, t_src2); } static void gen_op_multiply(TCGv dst, TCGv src1, TCGv src2, int sign_ext) @@ -692,34 +577,66 @@ static void gen_op_smul(TCGv dst, TCGv src1, TCGv src2) gen_op_multiply(dst, src1, src2, 1); } -static void gen_op_udivx(TCGv dst, TCGv src1, TCGv src2) -{ - gen_helper_udivx(dst, tcg_env, src1, src2); -} - -static void gen_op_sdivx(TCGv dst, TCGv src1, TCGv src2) -{ - gen_helper_sdivx(dst, tcg_env, src1, src2); -} - -static void gen_op_udiv(TCGv dst, TCGv src1, TCGv src2) -{ - gen_helper_udiv(dst, tcg_env, src1, src2); -} - static void gen_op_sdiv(TCGv dst, TCGv src1, TCGv src2) { +#ifdef TARGET_SPARC64 gen_helper_sdiv(dst, tcg_env, src1, src2); + tcg_gen_ext32s_tl(dst, dst); +#else + TCGv_i64 t64 = tcg_temp_new_i64(); + gen_helper_sdiv(t64, tcg_env, src1, src2); + tcg_gen_trunc_i64_tl(dst, t64); +#endif } static void gen_op_udivcc(TCGv dst, TCGv src1, TCGv src2) { - gen_helper_udiv_cc(dst, tcg_env, src1, src2); + TCGv_i64 t64; + +#ifdef TARGET_SPARC64 + t64 = cpu_cc_V; +#else + t64 = tcg_temp_new_i64(); +#endif + + gen_helper_udiv(t64, tcg_env, src1, src2); + +#ifdef TARGET_SPARC64 + tcg_gen_ext32u_tl(cpu_cc_N, t64); + tcg_gen_shri_tl(cpu_cc_V, t64, 32); + tcg_gen_mov_tl(cpu_icc_Z, cpu_cc_N); + tcg_gen_movi_tl(cpu_icc_C, 0); +#else + tcg_gen_extr_i64_tl(cpu_cc_N, cpu_cc_V, t64); +#endif + tcg_gen_mov_tl(cpu_cc_Z, cpu_cc_N); + tcg_gen_movi_tl(cpu_cc_C, 0); + tcg_gen_mov_tl(dst, cpu_cc_N); } static void gen_op_sdivcc(TCGv dst, TCGv src1, TCGv src2) { - gen_helper_sdiv_cc(dst, tcg_env, src1, src2); + TCGv_i64 t64; + +#ifdef TARGET_SPARC64 + t64 = cpu_cc_V; +#else + t64 = tcg_temp_new_i64(); +#endif + + gen_helper_sdiv(t64, tcg_env, src1, src2); + +#ifdef TARGET_SPARC64 + tcg_gen_ext32s_tl(cpu_cc_N, t64); + tcg_gen_shri_tl(cpu_cc_V, t64, 32); + tcg_gen_mov_tl(cpu_icc_Z, cpu_cc_N); + tcg_gen_movi_tl(cpu_icc_C, 0); +#else + tcg_gen_extr_i64_tl(cpu_cc_N, cpu_cc_V, t64); +#endif + tcg_gen_mov_tl(cpu_cc_Z, cpu_cc_N); + tcg_gen_movi_tl(cpu_cc_C, 0); + tcg_gen_mov_tl(dst, cpu_cc_N); } static void gen_op_taddcctv(TCGv dst, TCGv src1, TCGv src2) @@ -825,114 +742,12 @@ static void gen_op_eval_ba(TCGv dst) tcg_gen_movi_tl(dst, 1); } -// Z -static void gen_op_eval_be(TCGv dst, TCGv_i32 src) -{ - gen_mov_reg_Z(dst, src); -} - -// Z | (N ^ V) -static void gen_op_eval_ble(TCGv dst, TCGv_i32 src) -{ - TCGv t0 = tcg_temp_new(); - gen_mov_reg_N(t0, src); - gen_mov_reg_V(dst, src); - tcg_gen_xor_tl(dst, dst, t0); - gen_mov_reg_Z(t0, src); - tcg_gen_or_tl(dst, dst, t0); -} - -// N ^ V -static void gen_op_eval_bl(TCGv dst, TCGv_i32 src) -{ - TCGv t0 = tcg_temp_new(); - gen_mov_reg_V(t0, src); - gen_mov_reg_N(dst, src); - tcg_gen_xor_tl(dst, dst, t0); -} - -// C | Z -static void gen_op_eval_bleu(TCGv dst, TCGv_i32 src) -{ - TCGv t0 = tcg_temp_new(); - gen_mov_reg_Z(t0, src); - gen_mov_reg_C(dst, src); - tcg_gen_or_tl(dst, dst, t0); -} - -// C -static void gen_op_eval_bcs(TCGv dst, TCGv_i32 src) -{ - gen_mov_reg_C(dst, src); -} - -// V -static void gen_op_eval_bvs(TCGv dst, TCGv_i32 src) -{ - gen_mov_reg_V(dst, src); -} - // 0 static void gen_op_eval_bn(TCGv dst) { tcg_gen_movi_tl(dst, 0); } -// N -static void gen_op_eval_bneg(TCGv dst, TCGv_i32 src) -{ - gen_mov_reg_N(dst, src); -} - -// !Z -static void gen_op_eval_bne(TCGv dst, TCGv_i32 src) -{ - gen_mov_reg_Z(dst, src); - tcg_gen_xori_tl(dst, dst, 0x1); -} - -// !(Z | (N ^ V)) -static void gen_op_eval_bg(TCGv dst, TCGv_i32 src) -{ - gen_op_eval_ble(dst, src); - tcg_gen_xori_tl(dst, dst, 0x1); -} - -// !(N ^ V) -static void gen_op_eval_bge(TCGv dst, TCGv_i32 src) -{ - gen_op_eval_bl(dst, src); - tcg_gen_xori_tl(dst, dst, 0x1); -} - -// !(C | Z) -static void gen_op_eval_bgu(TCGv dst, TCGv_i32 src) -{ - gen_op_eval_bleu(dst, src); - tcg_gen_xori_tl(dst, dst, 0x1); -} - -// !C -static void gen_op_eval_bcc(TCGv dst, TCGv_i32 src) -{ - gen_mov_reg_C(dst, src); - tcg_gen_xori_tl(dst, dst, 0x1); -} - -// !N -static void gen_op_eval_bpos(TCGv dst, TCGv_i32 src) -{ - gen_mov_reg_N(dst, src); - tcg_gen_xori_tl(dst, dst, 0x1); -} - -// !V -static void gen_op_eval_bvc(TCGv dst, TCGv_i32 src) -{ - gen_mov_reg_V(dst, src); - tcg_gen_xori_tl(dst, dst, 0x1); -} - /* FPSR bit field FCC1 | FCC0: 0 = @@ -1074,26 +889,26 @@ static void gen_op_eval_fbo(TCGv dst, TCGv src, unsigned int fcc_offset) tcg_gen_xori_tl(dst, dst, 0x1); } -static void gen_branch2(DisasContext *dc, target_ulong pc1, - target_ulong pc2, TCGv r_cond) +static void finishing_insn(DisasContext *dc) { - TCGLabel *l1 = gen_new_label(); - - tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond, 0, l1); - - gen_goto_tb(dc, 0, pc1, pc1 + 4); - - gen_set_label(l1); - gen_goto_tb(dc, 1, pc2, pc2 + 4); + /* + * From here, there is no future path through an unwinding exception. + * If the current insn cannot raise an exception, the computation of + * cpu_cond may be able to be elided. + */ + if (dc->cpu_cond_live) { + tcg_gen_discard_tl(cpu_cond); + dc->cpu_cond_live = false; + } } static void gen_generic_branch(DisasContext *dc) { TCGv npc0 = tcg_constant_tl(dc->jump_pc[0]); TCGv npc1 = tcg_constant_tl(dc->jump_pc[1]); - TCGv zero = tcg_constant_tl(0); + TCGv c2 = tcg_constant_tl(dc->jump.c2); - tcg_gen_movcond_tl(TCG_COND_NE, cpu_npc, cpu_cond, zero, npc0, npc1); + tcg_gen_movcond_tl(dc->jump.cond, cpu_npc, dc->jump.c1, c2, npc0, npc1); } /* call this function before using the condition register as it may @@ -1125,14 +940,6 @@ static void save_npc(DisasContext *dc) } } -static void update_psr(DisasContext *dc) -{ - if (dc->cc_op != CC_OP_FLAGS) { - dc->cc_op = CC_OP_FLAGS; - gen_helper_compute_psr(tcg_env); - } -} - static void save_state(DisasContext *dc) { tcg_gen_movi_tl(cpu_pc, dc->pc); @@ -1141,6 +948,7 @@ static void save_state(DisasContext *dc) static void gen_exception(DisasContext *dc, int which) { + finishing_insn(dc); save_state(dc); gen_helper_raise_exception(tcg_env, tcg_constant_i32(which)); dc->base.is_jmp = DISAS_NORETURN; @@ -1182,6 +990,8 @@ static void gen_check_align(DisasContext *dc, TCGv addr, int mask) static void gen_mov_pc_npc(DisasContext *dc) { + finishing_insn(dc); + if (dc->npc & 3) { switch (dc->npc) { case JUMP_PC: @@ -1202,178 +1012,103 @@ static void gen_mov_pc_npc(DisasContext *dc) } } -static void gen_op_next_insn(void) -{ - tcg_gen_mov_tl(cpu_pc, cpu_npc); - tcg_gen_addi_tl(cpu_npc, cpu_npc, 4); -} - static void gen_compare(DisasCompare *cmp, bool xcc, unsigned int cond, DisasContext *dc) { - static int subcc_cond[16] = { - TCG_COND_NEVER, - TCG_COND_EQ, - TCG_COND_LE, - TCG_COND_LT, - TCG_COND_LEU, - TCG_COND_LTU, - -1, /* neg */ - -1, /* overflow */ - TCG_COND_ALWAYS, - TCG_COND_NE, - TCG_COND_GT, - TCG_COND_GE, - TCG_COND_GTU, - TCG_COND_GEU, - -1, /* pos */ - -1, /* no overflow */ - }; + TCGv t1; - static int logic_cond[16] = { - TCG_COND_NEVER, - TCG_COND_EQ, /* eq: Z */ - TCG_COND_LE, /* le: Z | (N ^ V) -> Z | N */ - TCG_COND_LT, /* lt: N ^ V -> N */ - TCG_COND_EQ, /* leu: C | Z -> Z */ - TCG_COND_NEVER, /* ltu: C -> 0 */ - TCG_COND_LT, /* neg: N */ - TCG_COND_NEVER, /* vs: V -> 0 */ - TCG_COND_ALWAYS, - TCG_COND_NE, /* ne: !Z */ - TCG_COND_GT, /* gt: !(Z | (N ^ V)) -> !(Z | N) */ - TCG_COND_GE, /* ge: !(N ^ V) -> !N */ - TCG_COND_NE, /* gtu: !(C | Z) -> !Z */ - TCG_COND_ALWAYS, /* geu: !C -> 1 */ - TCG_COND_GE, /* pos: !N */ - TCG_COND_ALWAYS, /* vc: !V -> 1 */ - }; - - TCGv_i32 r_src; - TCGv r_dst; + cmp->c1 = t1 = tcg_temp_new(); + cmp->c2 = 0; -#ifdef TARGET_SPARC64 - if (xcc) { - r_src = cpu_xcc; - } else { - r_src = cpu_psr; - } -#else - r_src = cpu_psr; -#endif + switch (cond & 7) { + case 0x0: /* never */ + cmp->cond = TCG_COND_NEVER; + cmp->c1 = tcg_constant_tl(0); + break; - switch (dc->cc_op) { - case CC_OP_LOGIC: - cmp->cond = logic_cond[cond]; - do_compare_dst_0: - cmp->is_bool = false; - cmp->c2 = tcg_constant_tl(0); -#ifdef TARGET_SPARC64 - if (!xcc) { - cmp->c1 = tcg_temp_new(); - tcg_gen_ext32s_tl(cmp->c1, cpu_cc_dst); - break; + case 0x1: /* eq: Z */ + cmp->cond = TCG_COND_EQ; + if (TARGET_LONG_BITS == 32 || xcc) { + tcg_gen_mov_tl(t1, cpu_cc_Z); + } else { + tcg_gen_ext32u_tl(t1, cpu_icc_Z); } -#endif - cmp->c1 = cpu_cc_dst; break; - case CC_OP_SUB: - switch (cond) { - case 6: /* neg */ - case 14: /* pos */ - cmp->cond = (cond == 6 ? TCG_COND_LT : TCG_COND_GE); - goto do_compare_dst_0; - - case 7: /* overflow */ - case 15: /* !overflow */ - goto do_dynamic; + case 0x2: /* le: Z | (N ^ V) */ + /* + * Simplify: + * cc_Z || (N ^ V) < 0 NE + * cc_Z && !((N ^ V) < 0) EQ + * cc_Z & ~((N ^ V) >> TLB) EQ + */ + cmp->cond = TCG_COND_EQ; + tcg_gen_xor_tl(t1, cpu_cc_N, cpu_cc_V); + tcg_gen_sextract_tl(t1, t1, xcc ? 63 : 31, 1); + tcg_gen_andc_tl(t1, xcc ? cpu_cc_Z : cpu_icc_Z, t1); + if (TARGET_LONG_BITS == 64 && !xcc) { + tcg_gen_ext32u_tl(t1, t1); + } + break; - default: - cmp->cond = subcc_cond[cond]; - cmp->is_bool = false; -#ifdef TARGET_SPARC64 - if (!xcc) { - /* Note that sign-extension works for unsigned compares as - long as both operands are sign-extended. */ - cmp->c1 = tcg_temp_new(); - cmp->c2 = tcg_temp_new(); - tcg_gen_ext32s_tl(cmp->c1, cpu_cc_src); - tcg_gen_ext32s_tl(cmp->c2, cpu_cc_src2); - break; - } -#endif - cmp->c1 = cpu_cc_src; - cmp->c2 = cpu_cc_src2; - break; + case 0x3: /* lt: N ^ V */ + cmp->cond = TCG_COND_LT; + tcg_gen_xor_tl(t1, cpu_cc_N, cpu_cc_V); + if (TARGET_LONG_BITS == 64 && !xcc) { + tcg_gen_ext32s_tl(t1, t1); } break; - default: - do_dynamic: - gen_helper_compute_psr(tcg_env); - dc->cc_op = CC_OP_FLAGS; - /* FALLTHRU */ + case 0x4: /* leu: Z | C */ + /* + * Simplify: + * cc_Z == 0 || cc_C != 0 NE + * cc_Z != 0 && cc_C == 0 EQ + * cc_Z & (cc_C ? 0 : -1) EQ + * cc_Z & (cc_C - 1) EQ + */ + cmp->cond = TCG_COND_EQ; + if (TARGET_LONG_BITS == 32 || xcc) { + tcg_gen_subi_tl(t1, cpu_cc_C, 1); + tcg_gen_and_tl(t1, t1, cpu_cc_Z); + } else { + tcg_gen_extract_tl(t1, cpu_icc_C, 32, 1); + tcg_gen_subi_tl(t1, t1, 1); + tcg_gen_and_tl(t1, t1, cpu_icc_Z); + tcg_gen_ext32u_tl(t1, t1); + } + break; - case CC_OP_FLAGS: - /* We're going to generate a boolean result. */ + case 0x5: /* ltu: C */ cmp->cond = TCG_COND_NE; - cmp->is_bool = true; - cmp->c1 = r_dst = tcg_temp_new(); - cmp->c2 = tcg_constant_tl(0); + if (TARGET_LONG_BITS == 32 || xcc) { + tcg_gen_mov_tl(t1, cpu_cc_C); + } else { + tcg_gen_extract_tl(t1, cpu_icc_C, 32, 1); + } + break; - switch (cond) { - case 0x0: - gen_op_eval_bn(r_dst); - break; - case 0x1: - gen_op_eval_be(r_dst, r_src); - break; - case 0x2: - gen_op_eval_ble(r_dst, r_src); - break; - case 0x3: - gen_op_eval_bl(r_dst, r_src); - break; - case 0x4: - gen_op_eval_bleu(r_dst, r_src); - break; - case 0x5: - gen_op_eval_bcs(r_dst, r_src); - break; - case 0x6: - gen_op_eval_bneg(r_dst, r_src); - break; - case 0x7: - gen_op_eval_bvs(r_dst, r_src); - break; - case 0x8: - gen_op_eval_ba(r_dst); - break; - case 0x9: - gen_op_eval_bne(r_dst, r_src); - break; - case 0xa: - gen_op_eval_bg(r_dst, r_src); - break; - case 0xb: - gen_op_eval_bge(r_dst, r_src); - break; - case 0xc: - gen_op_eval_bgu(r_dst, r_src); - break; - case 0xd: - gen_op_eval_bcc(r_dst, r_src); - break; - case 0xe: - gen_op_eval_bpos(r_dst, r_src); - break; - case 0xf: - gen_op_eval_bvc(r_dst, r_src); - break; + case 0x6: /* neg: N */ + cmp->cond = TCG_COND_LT; + if (TARGET_LONG_BITS == 32 || xcc) { + tcg_gen_mov_tl(t1, cpu_cc_N); + } else { + tcg_gen_ext32s_tl(t1, cpu_cc_N); + } + break; + + case 0x7: /* vs: V */ + cmp->cond = TCG_COND_LT; + if (TARGET_LONG_BITS == 32 || xcc) { + tcg_gen_mov_tl(t1, cpu_cc_V); + } else { + tcg_gen_ext32s_tl(t1, cpu_cc_V); } break; } + if (cond & 8) { + cmp->cond = tcg_invert_cond(cmp->cond); + } } static void gen_fcompare(DisasCompare *cmp, unsigned int cc, unsigned int cond) @@ -1383,9 +1118,8 @@ static void gen_fcompare(DisasCompare *cmp, unsigned int cc, unsigned int cond) /* For now we still generate a straight boolean result. */ cmp->cond = TCG_COND_NE; - cmp->is_bool = true; cmp->c1 = r_dst = tcg_temp_new(); - cmp->c2 = tcg_constant_tl(0); + cmp->c2 = 0; switch (cc) { default: @@ -1455,24 +1189,29 @@ static void gen_fcompare(DisasCompare *cmp, unsigned int cc, unsigned int cond) } } -// Inverted logic -static const TCGCond gen_tcg_cond_reg[8] = { - TCG_COND_NEVER, /* reserved */ - TCG_COND_NE, - TCG_COND_GT, - TCG_COND_GE, - TCG_COND_NEVER, /* reserved */ - TCG_COND_EQ, - TCG_COND_LE, - TCG_COND_LT, -}; - -static void gen_compare_reg(DisasCompare *cmp, int cond, TCGv r_src) +static bool gen_compare_reg(DisasCompare *cmp, int cond, TCGv r_src) { - cmp->cond = tcg_invert_cond(gen_tcg_cond_reg[cond]); - cmp->is_bool = false; - cmp->c1 = r_src; - cmp->c2 = tcg_constant_tl(0); + static const TCGCond cond_reg[4] = { + TCG_COND_NEVER, /* reserved */ + TCG_COND_EQ, + TCG_COND_LE, + TCG_COND_LT, + }; + TCGCond tcond; + + if ((cond & 3) == 0) { + return false; + } + tcond = cond_reg[cond & 3]; + if (cond & 4) { + tcond = tcg_invert_cond(tcond); + } + + cmp->cond = tcond; + cmp->c1 = tcg_temp_new(); + cmp->c2 = 0; + tcg_gen_mov_tl(cmp->c1, r_src); + return true; } static void gen_op_clear_ieee_excp_and_FTT(void) @@ -2472,18 +2211,14 @@ static void gen_fmovs(DisasContext *dc, DisasCompare *cmp, int rd, int rs) { #ifdef TARGET_SPARC64 TCGv_i32 c32, zero, dst, s1, s2; + TCGv_i64 c64 = tcg_temp_new_i64(); /* We have two choices here: extend the 32 bit data and use movcond_i64, or fold the comparison down to 32 bits and use movcond_i32. Choose the later. */ c32 = tcg_temp_new_i32(); - if (cmp->is_bool) { - tcg_gen_extrl_i64_i32(c32, cmp->c1); - } else { - TCGv_i64 c64 = tcg_temp_new_i64(); - tcg_gen_setcond_i64(cmp->cond, c64, cmp->c1, cmp->c2); - tcg_gen_extrl_i64_i32(c32, c64); - } + tcg_gen_setcondi_i64(cmp->cond, c64, cmp->c1, cmp->c2); + tcg_gen_extrl_i64_i32(c32, c64); s1 = gen_load_fpr_F(dc, rs); s2 = gen_load_fpr_F(dc, rd); @@ -2502,7 +2237,7 @@ static void gen_fmovd(DisasContext *dc, DisasCompare *cmp, int rd, int rs) { #ifdef TARGET_SPARC64 TCGv_i64 dst = gen_dest_fpr_D(dc, rd); - tcg_gen_movcond_i64(cmp->cond, dst, cmp->c1, cmp->c2, + tcg_gen_movcond_i64(cmp->cond, dst, cmp->c1, tcg_constant_tl(cmp->c2), gen_load_fpr_D(dc, rs), gen_load_fpr_D(dc, rd)); gen_store_fpr_D(dc, rd, dst); @@ -2516,10 +2251,11 @@ static void gen_fmovq(DisasContext *dc, DisasCompare *cmp, int rd, int rs) #ifdef TARGET_SPARC64 int qd = QFPREG(rd); int qs = QFPREG(rs); + TCGv c2 = tcg_constant_tl(cmp->c2); - tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2], cmp->c1, cmp->c2, + tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2], cmp->c1, c2, cpu_fpr[qs / 2], cpu_fpr[qd / 2]); - tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2 + 1], cmp->c1, cmp->c2, + tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2 + 1], cmp->c1, c2, cpu_fpr[qs / 2 + 1], cpu_fpr[qd / 2 + 1]); gen_update_fprs_dirty(dc, qd); @@ -2599,18 +2335,34 @@ static int extract_qfpreg(DisasContext *dc, int x) /* Default case for non jump instructions. */ static bool advance_pc(DisasContext *dc) { + TCGLabel *l1; + + finishing_insn(dc); + if (dc->npc & 3) { switch (dc->npc) { case DYNAMIC_PC: case DYNAMIC_PC_LOOKUP: dc->pc = dc->npc; - gen_op_next_insn(); + tcg_gen_mov_tl(cpu_pc, cpu_npc); + tcg_gen_addi_tl(cpu_npc, cpu_npc, 4); break; + case JUMP_PC: /* we can do a static jump */ - gen_branch2(dc, dc->jump_pc[0], dc->jump_pc[1], cpu_cond); + l1 = gen_new_label(); + tcg_gen_brcondi_tl(dc->jump.cond, dc->jump.c1, dc->jump.c2, l1); + + /* jump not taken */ + gen_goto_tb(dc, 1, dc->jump_pc[1], dc->jump_pc[1] + 4); + + /* jump taken */ + gen_set_label(l1); + gen_goto_tb(dc, 0, dc->jump_pc[0], dc->jump_pc[0] + 4); + dc->base.is_jmp = DISAS_NORETURN; break; + default: g_assert_not_reached(); } @@ -2625,41 +2377,47 @@ static bool advance_pc(DisasContext *dc) * Major opcodes 00 and 01 -- branches, call, and sethi */ -static bool advance_jump_uncond_never(DisasContext *dc, bool annul) +static bool advance_jump_cond(DisasContext *dc, DisasCompare *cmp, + bool annul, int disp) { - if (annul) { - dc->pc = dc->npc + 4; - dc->npc = dc->pc + 4; - } else { - dc->pc = dc->npc; - dc->npc = dc->pc + 4; + target_ulong dest = address_mask_i(dc, dc->pc + disp * 4); + target_ulong npc; + + finishing_insn(dc); + + if (cmp->cond == TCG_COND_ALWAYS) { + if (annul) { + dc->pc = dest; + dc->npc = dest + 4; + } else { + gen_mov_pc_npc(dc); + dc->npc = dest; + } + return true; } - return true; -} -static bool advance_jump_uncond_always(DisasContext *dc, bool annul, - target_ulong dest) -{ - if (annul) { - dc->pc = dest; - dc->npc = dest + 4; - } else { - dc->pc = dc->npc; - dc->npc = dest; - tcg_gen_mov_tl(cpu_pc, cpu_npc); + if (cmp->cond == TCG_COND_NEVER) { + npc = dc->npc; + if (npc & 3) { + gen_mov_pc_npc(dc); + if (annul) { + tcg_gen_addi_tl(cpu_pc, cpu_pc, 4); + } + tcg_gen_addi_tl(cpu_npc, cpu_pc, 4); + } else { + dc->pc = npc + (annul ? 4 : 0); + dc->npc = dc->pc + 4; + } + return true; } - return true; -} -static bool advance_jump_cond(DisasContext *dc, DisasCompare *cmp, - bool annul, target_ulong dest) -{ - target_ulong npc = dc->npc; + flush_cond(dc); + npc = dc->npc; if (annul) { TCGLabel *l1 = gen_new_label(); - tcg_gen_brcond_tl(tcg_invert_cond(cmp->cond), cmp->c1, cmp->c2, l1); + tcg_gen_brcondi_tl(tcg_invert_cond(cmp->cond), cmp->c1, cmp->c2, l1); gen_goto_tb(dc, 0, npc, dest); gen_set_label(l1); gen_goto_tb(dc, 1, npc + 4, npc + 8); @@ -2673,7 +2431,7 @@ static bool advance_jump_cond(DisasContext *dc, DisasCompare *cmp, tcg_gen_mov_tl(cpu_pc, cpu_npc); tcg_gen_addi_tl(cpu_npc, cpu_npc, 4); tcg_gen_movcond_tl(cmp->cond, cpu_npc, - cmp->c1, cmp->c2, + cmp->c1, tcg_constant_tl(cmp->c2), tcg_constant_tl(dest), cpu_npc); dc->pc = npc; break; @@ -2682,14 +2440,18 @@ static bool advance_jump_cond(DisasContext *dc, DisasCompare *cmp, } } else { dc->pc = npc; + dc->npc = JUMP_PC; + dc->jump = *cmp; dc->jump_pc[0] = dest; dc->jump_pc[1] = npc + 4; - dc->npc = JUMP_PC; - if (cmp->is_bool) { - tcg_gen_mov_tl(cpu_cond, cmp->c1); + + /* The condition for cpu_cond is always NE -- normalize. */ + if (cmp->cond == TCG_COND_NE) { + tcg_gen_xori_tl(cpu_cond, cmp->c1, cmp->c2); } else { - tcg_gen_setcond_tl(cmp->cond, cpu_cond, cmp->c1, cmp->c2); + tcg_gen_setcondi_tl(cmp->cond, cpu_cond, cmp->c1, cmp->c2); } + dc->cpu_cond_live = true; } } return true; @@ -2717,20 +2479,10 @@ static bool gen_trap_float128(DisasContext *dc) static bool do_bpcc(DisasContext *dc, arg_bcc *a) { - target_long target = address_mask_i(dc, dc->pc + a->i * 4); DisasCompare cmp; - switch (a->cond) { - case 0x0: - return advance_jump_uncond_never(dc, a->a); - case 0x8: - return advance_jump_uncond_always(dc, a->a, target); - default: - flush_cond(dc); - - gen_compare(&cmp, a->cc, a->cond, dc); - return advance_jump_cond(dc, &cmp, a->a, target); - } + gen_compare(&cmp, a->cc, a->cond, dc); + return advance_jump_cond(dc, &cmp, a->a, a->i); } TRANS(Bicc, ALL, do_bpcc, a) @@ -2738,23 +2490,13 @@ TRANS(BPcc, 64, do_bpcc, a) static bool do_fbpfcc(DisasContext *dc, arg_bcc *a) { - target_long target = address_mask_i(dc, dc->pc + a->i * 4); DisasCompare cmp; if (gen_trap_ifnofpu(dc)) { return true; } - switch (a->cond) { - case 0x0: - return advance_jump_uncond_never(dc, a->a); - case 0x8: - return advance_jump_uncond_always(dc, a->a, target); - default: - flush_cond(dc); - - gen_fcompare(&cmp, a->cc, a->cond); - return advance_jump_cond(dc, &cmp, a->a, target); - } + gen_fcompare(&cmp, a->cc, a->cond); + return advance_jump_cond(dc, &cmp, a->a, a->i); } TRANS(FBPfcc, 64, do_fbpfcc, a) @@ -2762,19 +2504,15 @@ TRANS(FBfcc, ALL, do_fbpfcc, a) static bool trans_BPr(DisasContext *dc, arg_BPr *a) { - target_long target = address_mask_i(dc, dc->pc + a->i * 4); DisasCompare cmp; if (!avail_64(dc)) { return false; } - if (gen_tcg_cond_reg[a->cond] == TCG_COND_NEVER) { + if (!gen_compare_reg(&cmp, a->cond, gen_load_gpr(dc, a->rs1))) { return false; } - - flush_cond(dc); - gen_compare_reg(&cmp, a->cond, gen_load_gpr(dc, a->rs1)); - return advance_jump_cond(dc, &cmp, a->a, target); + return advance_jump_cond(dc, &cmp, a->a, a->i); } static bool trans_CALL(DisasContext *dc, arg_CALL *a) @@ -2848,6 +2586,8 @@ static bool do_tcc(DisasContext *dc, int cond, int cc, tcg_gen_addi_i32(trap, trap, TT_TRAP); } + finishing_insn(dc); + /* Trap always. */ if (cond == 8) { save_state(dc); @@ -2860,7 +2600,7 @@ static bool do_tcc(DisasContext *dc, int cond, int cc, flush_cond(dc); lab = delay_exceptionv(dc, trap); gen_compare(&cmp, cc, cond, dc); - tcg_gen_brcond_tl(cmp.cond, cmp.c1, cmp.c2, lab); + tcg_gen_brcondi_tl(cmp.cond, cmp.c1, cmp.c2, lab); return advance_pc(dc); } @@ -2957,7 +2697,6 @@ TRANS(RDASR17, ASR17, do_rd_special, true, a->rd, do_rd_leon3_config) static TCGv do_rdccr(DisasContext *dc, TCGv dst) { - update_psr(dc); gen_helper_rdccr(dst, tcg_env); return dst; } @@ -3070,7 +2809,6 @@ TRANS(RDSTRAND_STATUS, HYPV, do_rd_special, true, a->rd, do_rdstrand_status) static TCGv do_rdpsr(DisasContext *dc, TCGv dst) { - update_psr(dc); gen_helper_rdpsr(dst, tcg_env); return dst; } @@ -3466,6 +3204,7 @@ TRANS(WRSTICK_CMPR, 64, do_wr_special, a, supervisor(dc), do_wrstick_cmpr) static void do_wrpowerdown(DisasContext *dc, TCGv src) { + finishing_insn(dc); save_state(dc); gen_helper_power_down(tcg_env); } @@ -3475,8 +3214,6 @@ TRANS(WRPOWERDOWN, POWERDOWN, do_wr_special, a, supervisor(dc), do_wrpowerdown) static void do_wrpsr(DisasContext *dc, TCGv src) { gen_helper_wrpsr(tcg_env, src); - tcg_gen_movi_i32(cpu_cc_op, CC_OP_FLAGS); - dc->cc_op = CC_OP_FLAGS; dc->base.is_jmp = DISAS_EXIT; } @@ -3740,9 +3477,10 @@ static bool trans_NOP(DisasContext *dc, arg_NOP *a) TRANS(NOP_v7, 32, trans_NOP, a) TRANS(NOP_v9, 64, trans_NOP, a) -static bool do_arith_int(DisasContext *dc, arg_r_r_ri_cc *a, int cc_op, +static bool do_arith_int(DisasContext *dc, arg_r_r_ri_cc *a, void (*func)(TCGv, TCGv, TCGv), - void (*funci)(TCGv, TCGv, target_long)) + void (*funci)(TCGv, TCGv, target_long), + bool logic_cc) { TCGv dst, src1; @@ -3751,8 +3489,8 @@ static bool do_arith_int(DisasContext *dc, arg_r_r_ri_cc *a, int cc_op, return false; } - if (a->cc) { - dst = cpu_cc_dst; + if (logic_cc) { + dst = cpu_cc_N; } else { dst = gen_dest_gpr(dc, a->rd); } @@ -3767,43 +3505,48 @@ static bool do_arith_int(DisasContext *dc, arg_r_r_ri_cc *a, int cc_op, } else { func(dst, src1, cpu_regs[a->rs2_or_imm]); } - gen_store_gpr(dc, a->rd, dst); - if (a->cc) { - tcg_gen_movi_i32(cpu_cc_op, cc_op); - dc->cc_op = cc_op; + if (logic_cc) { + if (TARGET_LONG_BITS == 64) { + tcg_gen_mov_tl(cpu_icc_Z, cpu_cc_N); + tcg_gen_movi_tl(cpu_icc_C, 0); + } + tcg_gen_mov_tl(cpu_cc_Z, cpu_cc_N); + tcg_gen_movi_tl(cpu_cc_C, 0); + tcg_gen_movi_tl(cpu_cc_V, 0); } + + gen_store_gpr(dc, a->rd, dst); return advance_pc(dc); } -static bool do_arith(DisasContext *dc, arg_r_r_ri_cc *a, int cc_op, +static bool do_arith(DisasContext *dc, arg_r_r_ri_cc *a, void (*func)(TCGv, TCGv, TCGv), void (*funci)(TCGv, TCGv, target_long), void (*func_cc)(TCGv, TCGv, TCGv)) { if (a->cc) { - assert(cc_op >= 0); - return do_arith_int(dc, a, cc_op, func_cc, NULL); + return do_arith_int(dc, a, func_cc, NULL, false); } - return do_arith_int(dc, a, cc_op, func, funci); + return do_arith_int(dc, a, func, funci, false); } static bool do_logic(DisasContext *dc, arg_r_r_ri_cc *a, void (*func)(TCGv, TCGv, TCGv), void (*funci)(TCGv, TCGv, target_long)) { - return do_arith_int(dc, a, CC_OP_LOGIC, func, funci); + return do_arith_int(dc, a, func, funci, a->cc); } -TRANS(ADD, ALL, do_arith, a, CC_OP_ADD, - tcg_gen_add_tl, tcg_gen_addi_tl, gen_op_add_cc) -TRANS(SUB, ALL, do_arith, a, CC_OP_SUB, - tcg_gen_sub_tl, tcg_gen_subi_tl, gen_op_sub_cc) +TRANS(ADD, ALL, do_arith, a, tcg_gen_add_tl, tcg_gen_addi_tl, gen_op_addcc) +TRANS(SUB, ALL, do_arith, a, tcg_gen_sub_tl, tcg_gen_subi_tl, gen_op_subcc) +TRANS(ADDC, ALL, do_arith, a, gen_op_addc, NULL, gen_op_addccc) +TRANS(SUBC, ALL, do_arith, a, gen_op_subc, NULL, gen_op_subccc) -TRANS(TADDcc, ALL, do_arith, a, CC_OP_TADD, NULL, NULL, gen_op_add_cc) -TRANS(TSUBcc, ALL, do_arith, a, CC_OP_TSUB, NULL, NULL, gen_op_sub_cc) -TRANS(TADDccTV, ALL, do_arith, a, CC_OP_TADDTV, NULL, NULL, gen_op_taddcctv) -TRANS(TSUBccTV, ALL, do_arith, a, CC_OP_TSUBTV, NULL, NULL, gen_op_tsubcctv) +TRANS(TADDcc, ALL, do_arith, a, NULL, NULL, gen_op_taddcc) +TRANS(TSUBcc, ALL, do_arith, a, NULL, NULL, gen_op_tsubcc) +TRANS(TADDccTV, ALL, do_arith, a, NULL, NULL, gen_op_taddcctv) +TRANS(TSUBccTV, ALL, do_arith, a, NULL, NULL, gen_op_tsubcctv) TRANS(AND, ALL, do_logic, a, tcg_gen_and_tl, tcg_gen_andi_tl) TRANS(XOR, ALL, do_logic, a, tcg_gen_xor_tl, tcg_gen_xori_tl) @@ -3811,17 +3554,16 @@ TRANS(ANDN, ALL, do_logic, a, tcg_gen_andc_tl, NULL) TRANS(ORN, ALL, do_logic, a, tcg_gen_orc_tl, NULL) TRANS(XORN, ALL, do_logic, a, tcg_gen_eqv_tl, NULL) -TRANS(MULX, 64, do_arith, a, -1, tcg_gen_mul_tl, tcg_gen_muli_tl, NULL) +TRANS(MULX, 64, do_arith, a, tcg_gen_mul_tl, tcg_gen_muli_tl, NULL) TRANS(UMUL, MUL, do_logic, a, gen_op_umul, NULL) TRANS(SMUL, MUL, do_logic, a, gen_op_smul, NULL) +TRANS(MULScc, ALL, do_arith, a, NULL, NULL, gen_op_mulscc) -TRANS(UDIVX, 64, do_arith, a, -1, gen_op_udivx, NULL, NULL) -TRANS(SDIVX, 64, do_arith, a, -1, gen_op_sdivx, NULL, NULL) -TRANS(UDIV, DIV, do_arith, a, CC_OP_DIV, gen_op_udiv, NULL, gen_op_udivcc) -TRANS(SDIV, DIV, do_arith, a, CC_OP_DIV, gen_op_sdiv, NULL, gen_op_sdivcc) +TRANS(UDIVcc, DIV, do_arith, a, NULL, NULL, gen_op_udivcc) +TRANS(SDIV, DIV, do_arith, a, gen_op_sdiv, NULL, gen_op_sdivcc) /* TODO: Should have feature bit -- comes in with UltraSparc T2. */ -TRANS(POPC, 64, do_arith, a, -1, gen_op_popc, NULL, NULL) +TRANS(POPC, 64, do_arith, a, gen_op_popc, NULL, NULL) static bool trans_OR(DisasContext *dc, arg_r_r_ri_cc *a) { @@ -3840,58 +3582,152 @@ static bool trans_OR(DisasContext *dc, arg_r_r_ri_cc *a) return do_logic(dc, a, tcg_gen_or_tl, tcg_gen_ori_tl); } -static bool trans_ADDC(DisasContext *dc, arg_r_r_ri_cc *a) -{ - switch (dc->cc_op) { - case CC_OP_DIV: - case CC_OP_LOGIC: - /* Carry is known to be zero. Fall back to plain ADD. */ - return do_arith(dc, a, CC_OP_ADD, - tcg_gen_add_tl, tcg_gen_addi_tl, gen_op_add_cc); - case CC_OP_ADD: - case CC_OP_TADD: - case CC_OP_TADDTV: - return do_arith(dc, a, CC_OP_ADDX, - gen_op_addc_add, NULL, gen_op_addccc_add); - case CC_OP_SUB: - case CC_OP_TSUB: - case CC_OP_TSUBTV: - return do_arith(dc, a, CC_OP_ADDX, - gen_op_addc_sub, NULL, gen_op_addccc_sub); - default: - return do_arith(dc, a, CC_OP_ADDX, - gen_op_addc_generic, NULL, gen_op_addccc_generic); - } -} - -static bool trans_SUBC(DisasContext *dc, arg_r_r_ri_cc *a) -{ - switch (dc->cc_op) { - case CC_OP_DIV: - case CC_OP_LOGIC: - /* Carry is known to be zero. Fall back to plain SUB. */ - return do_arith(dc, a, CC_OP_SUB, - tcg_gen_sub_tl, tcg_gen_subi_tl, gen_op_sub_cc); - case CC_OP_ADD: - case CC_OP_TADD: - case CC_OP_TADDTV: - return do_arith(dc, a, CC_OP_SUBX, - gen_op_subc_add, NULL, gen_op_subccc_add); - case CC_OP_SUB: - case CC_OP_TSUB: - case CC_OP_TSUBTV: - return do_arith(dc, a, CC_OP_SUBX, - gen_op_subc_sub, NULL, gen_op_subccc_sub); - default: - return do_arith(dc, a, CC_OP_SUBX, - gen_op_subc_generic, NULL, gen_op_subccc_generic); +static bool trans_UDIV(DisasContext *dc, arg_r_r_ri *a) +{ + TCGv_i64 t1, t2; + TCGv dst; + + if (!avail_DIV(dc)) { + return false; + } + /* For simplicity, we under-decoded the rs2 form. */ + if (!a->imm && a->rs2_or_imm & ~0x1f) { + return false; + } + + if (unlikely(a->rs2_or_imm == 0)) { + gen_exception(dc, TT_DIV_ZERO); + return true; + } + + if (a->imm) { + t2 = tcg_constant_i64((uint32_t)a->rs2_or_imm); + } else { + TCGLabel *lab; + TCGv_i32 n2; + + finishing_insn(dc); + flush_cond(dc); + + n2 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(n2, cpu_regs[a->rs2_or_imm]); + + lab = delay_exception(dc, TT_DIV_ZERO); + tcg_gen_brcondi_i32(TCG_COND_EQ, n2, 0, lab); + + t2 = tcg_temp_new_i64(); +#ifdef TARGET_SPARC64 + tcg_gen_ext32u_i64(t2, cpu_regs[a->rs2_or_imm]); +#else + tcg_gen_extu_i32_i64(t2, cpu_regs[a->rs2_or_imm]); +#endif } + + t1 = tcg_temp_new_i64(); + tcg_gen_concat_tl_i64(t1, gen_load_gpr(dc, a->rs1), cpu_y); + + tcg_gen_divu_i64(t1, t1, t2); + tcg_gen_umin_i64(t1, t1, tcg_constant_i64(UINT32_MAX)); + + dst = gen_dest_gpr(dc, a->rd); + tcg_gen_trunc_i64_tl(dst, t1); + gen_store_gpr(dc, a->rd, dst); + return advance_pc(dc); } -static bool trans_MULScc(DisasContext *dc, arg_r_r_ri_cc *a) +static bool trans_UDIVX(DisasContext *dc, arg_r_r_ri *a) { - update_psr(dc); - return do_arith(dc, a, CC_OP_ADD, NULL, NULL, gen_op_mulscc); + TCGv dst, src1, src2; + + if (!avail_64(dc)) { + return false; + } + /* For simplicity, we under-decoded the rs2 form. */ + if (!a->imm && a->rs2_or_imm & ~0x1f) { + return false; + } + + if (unlikely(a->rs2_or_imm == 0)) { + gen_exception(dc, TT_DIV_ZERO); + return true; + } + + if (a->imm) { + src2 = tcg_constant_tl(a->rs2_or_imm); + } else { + TCGLabel *lab; + + finishing_insn(dc); + flush_cond(dc); + + lab = delay_exception(dc, TT_DIV_ZERO); + src2 = cpu_regs[a->rs2_or_imm]; + tcg_gen_brcondi_tl(TCG_COND_EQ, src2, 0, lab); + } + + dst = gen_dest_gpr(dc, a->rd); + src1 = gen_load_gpr(dc, a->rs1); + + tcg_gen_divu_tl(dst, src1, src2); + gen_store_gpr(dc, a->rd, dst); + return advance_pc(dc); +} + +static bool trans_SDIVX(DisasContext *dc, arg_r_r_ri *a) +{ + TCGv dst, src1, src2; + + if (!avail_64(dc)) { + return false; + } + /* For simplicity, we under-decoded the rs2 form. */ + if (!a->imm && a->rs2_or_imm & ~0x1f) { + return false; + } + + if (unlikely(a->rs2_or_imm == 0)) { + gen_exception(dc, TT_DIV_ZERO); + return true; + } + + dst = gen_dest_gpr(dc, a->rd); + src1 = gen_load_gpr(dc, a->rs1); + + if (a->imm) { + if (unlikely(a->rs2_or_imm == -1)) { + tcg_gen_neg_tl(dst, src1); + gen_store_gpr(dc, a->rd, dst); + return advance_pc(dc); + } + src2 = tcg_constant_tl(a->rs2_or_imm); + } else { + TCGLabel *lab; + TCGv t1, t2; + + finishing_insn(dc); + flush_cond(dc); + + lab = delay_exception(dc, TT_DIV_ZERO); + src2 = cpu_regs[a->rs2_or_imm]; + tcg_gen_brcondi_tl(TCG_COND_EQ, src2, 0, lab); + + /* + * Need to avoid INT64_MIN / -1, which will trap on x86 host. + * Set SRC2 to 1 as a new divisor, to produce the correct result. + */ + t1 = tcg_temp_new(); + t2 = tcg_temp_new(); + tcg_gen_setcondi_tl(TCG_COND_EQ, t1, src1, (target_long)INT64_MIN); + tcg_gen_setcondi_tl(TCG_COND_EQ, t2, src2, -1); + tcg_gen_and_tl(t1, t1, t2); + tcg_gen_movcond_tl(TCG_COND_NE, t1, t1, tcg_constant_tl(0), + tcg_constant_tl(1), src2); + src2 = t1; + } + + tcg_gen_div_tl(dst, src1, src2); + gen_store_gpr(dc, a->rd, dst); + return advance_pc(dc); } static bool gen_edge(DisasContext *dc, arg_r_r_r *a, @@ -3906,11 +3742,7 @@ static bool gen_edge(DisasContext *dc, arg_r_r_r *a, s2 = gen_load_gpr(dc, a->rs2); if (cc) { - tcg_gen_mov_tl(cpu_cc_src, s1); - tcg_gen_mov_tl(cpu_cc_src2, s2); - tcg_gen_sub_tl(cpu_cc_dst, s1, s2); - tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUB); - dc->cc_op = CC_OP_SUB; + gen_op_subcc(cpu_cc_N, s1, s2); } /* @@ -4152,8 +3984,9 @@ static TCGv gen_rs2_or_imm(DisasContext *dc, bool imm, int rs2_or_imm) static bool do_mov_cond(DisasContext *dc, DisasCompare *cmp, int rd, TCGv src2) { TCGv dst = gen_load_gpr(dc, rd); + TCGv c2 = tcg_constant_tl(cmp->c2); - tcg_gen_movcond_tl(cmp->cond, dst, cmp->c1, cmp->c2, src2, dst); + tcg_gen_movcond_tl(cmp->cond, dst, cmp->c1, c2, src2, dst); gen_store_gpr(dc, rd, dst); return advance_pc(dc); } @@ -4190,7 +4023,9 @@ static bool trans_MOVR(DisasContext *dc, arg_MOVR *a) if (src2 == NULL) { return false; } - gen_compare_reg(&cmp, a->cond, gen_load_gpr(dc, a->rs1)); + if (!gen_compare_reg(&cmp, a->cond, gen_load_gpr(dc, a->rs1))) { + return false; + } return do_mov_cond(dc, &cmp, a->rd, src2); } @@ -5177,6 +5012,9 @@ static bool do_fmovr(DisasContext *dc, arg_FMOVRs *a, bool is_128, { DisasCompare cmp; + if (!gen_compare_reg(&cmp, a->cond, gen_load_gpr(dc, a->rs1))) { + return false; + } if (gen_trap_ifnofpu(dc)) { return true; } @@ -5185,7 +5023,6 @@ static bool do_fmovr(DisasContext *dc, arg_FMOVRs *a, bool is_128, } gen_op_clear_ieee_excp_and_FTT(); - gen_compare_reg(&cmp, a->cond, gen_load_gpr(dc, a->rs1)); func(dc, &cmp, a->rd, a->rs2); return advance_pc(dc); } @@ -5322,7 +5159,6 @@ static void sparc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->pc = dc->base.pc_first; dc->npc = (target_ulong)dc->base.tb->cs_base; - dc->cc_op = CC_OP_DYNAMIC; dc->mem_idx = dc->base.tb->flags & TB_FLAG_MMU_MASK; dc->def = &env->def; dc->fpu_enabled = tb_fpu_enabled(dc->base.tb->flags); @@ -5398,6 +5234,8 @@ static void sparc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) DisasDelayException *e, *e_next; bool may_lookup; + finishing_insn(dc); + switch (dc->base.is_jmp) { case DISAS_NEXT: case DISAS_TOO_MANY: @@ -5511,23 +5349,17 @@ void sparc_tcg_init(void) "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", }; - static const struct { TCGv_i32 *ptr; int off; const char *name; } r32[] = { -#ifdef TARGET_SPARC64 - { &cpu_xcc, offsetof(CPUSPARCState, xcc), "xcc" }, - { &cpu_fprs, offsetof(CPUSPARCState, fprs), "fprs" }, -#endif - { &cpu_cc_op, offsetof(CPUSPARCState, cc_op), "cc_op" }, - { &cpu_psr, offsetof(CPUSPARCState, psr), "psr" }, - }; - static const struct { TCGv *ptr; int off; const char *name; } rtl[] = { #ifdef TARGET_SPARC64 { &cpu_gsr, offsetof(CPUSPARCState, gsr), "gsr" }, + { &cpu_xcc_Z, offsetof(CPUSPARCState, xcc_Z), "xcc_Z" }, + { &cpu_xcc_C, offsetof(CPUSPARCState, xcc_C), "xcc_C" }, #endif + { &cpu_cc_N, offsetof(CPUSPARCState, cc_N), "cc_N" }, + { &cpu_cc_V, offsetof(CPUSPARCState, cc_V), "cc_V" }, + { &cpu_icc_Z, offsetof(CPUSPARCState, icc_Z), "icc_Z" }, + { &cpu_icc_C, offsetof(CPUSPARCState, icc_C), "icc_C" }, { &cpu_cond, offsetof(CPUSPARCState, cond), "cond" }, - { &cpu_cc_src, offsetof(CPUSPARCState, cc_src), "cc_src" }, - { &cpu_cc_src2, offsetof(CPUSPARCState, cc_src2), "cc_src2" }, - { &cpu_cc_dst, offsetof(CPUSPARCState, cc_dst), "cc_dst" }, { &cpu_fsr, offsetof(CPUSPARCState, fsr), "fsr" }, { &cpu_pc, offsetof(CPUSPARCState, pc), "pc" }, { &cpu_npc, offsetof(CPUSPARCState, npc), "npc" }, @@ -5541,10 +5373,6 @@ void sparc_tcg_init(void) offsetof(CPUSPARCState, regwptr), "regwptr"); - for (i = 0; i < ARRAY_SIZE(r32); ++i) { - *r32[i].ptr = tcg_global_mem_new_i32(tcg_env, r32[i].off, r32[i].name); - } - for (i = 0; i < ARRAY_SIZE(rtl); ++i) { *rtl[i].ptr = tcg_global_mem_new(tcg_env, rtl[i].off, rtl[i].name); } @@ -5567,6 +5395,11 @@ void sparc_tcg_init(void) offsetof(CPUSPARCState, fpr[i]), fregnames[i]); } + +#ifdef TARGET_SPARC64 + cpu_fprs = tcg_global_mem_new_i32(tcg_env, + offsetof(CPUSPARCState, fprs), "fprs"); +#endif } void sparc_restore_state_to_opc(CPUState *cs, diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c index 3a7c0ff943..16d1c70fe7 100644 --- a/target/sparc/win_helper.c +++ b/target/sparc/win_helper.c @@ -53,23 +53,47 @@ void cpu_set_cwp(CPUSPARCState *env, int new_cwp) target_ulong cpu_get_psr(CPUSPARCState *env) { - helper_compute_psr(env); + target_ulong icc = 0; + + icc |= ((int32_t)env->cc_N < 0) << PSR_NEG_SHIFT; + icc |= ((int32_t)env->cc_V < 0) << PSR_OVF_SHIFT; + icc |= ((int32_t)env->icc_Z == 0) << PSR_ZERO_SHIFT; + if (TARGET_LONG_BITS == 64) { + icc |= extract64(env->icc_C, 32, 1) << PSR_CARRY_SHIFT; + } else { + icc |= env->icc_C << PSR_CARRY_SHIFT; + } #if !defined(TARGET_SPARC64) - return env->version | (env->psr & PSR_ICC) | + return env->version | icc | (env->psref ? PSR_EF : 0) | (env->psrpil << 8) | (env->psrs ? PSR_S : 0) | (env->psrps ? PSR_PS : 0) | (env->psret ? PSR_ET : 0) | env->cwp; #else - return env->psr & PSR_ICC; + return icc; #endif } +void cpu_put_psr_icc(CPUSPARCState *env, target_ulong val) +{ + if (TARGET_LONG_BITS == 64) { + /* Do not clobber xcc.[NV] */ + env->cc_N = deposit64(env->cc_N, 0, 32, -(val & PSR_NEG)); + env->cc_V = deposit64(env->cc_V, 0, 32, -(val & PSR_OVF)); + env->icc_C = -(val & PSR_CARRY); + } else { + env->cc_N = -(val & PSR_NEG); + env->cc_V = -(val & PSR_OVF); + env->icc_C = (val >> PSR_CARRY_SHIFT) & 1; + } + env->icc_Z = ~val & PSR_ZERO; +} + void cpu_put_psr_raw(CPUSPARCState *env, target_ulong val) { - env->psr = val & PSR_ICC; + cpu_put_psr_icc(env, val); #if !defined(TARGET_SPARC64) env->psref = (val & PSR_EF) ? 1 : 0; env->psrpil = (val & PSR_PIL) >> 8; @@ -77,7 +101,6 @@ void cpu_put_psr_raw(CPUSPARCState *env, target_ulong val) env->psrps = (val & PSR_PS) ? 1 : 0; env->psret = (val & PSR_ET) ? 1 : 0; #endif - env->cc_op = CC_OP_FLAGS; #if !defined(TARGET_SPARC64) cpu_set_cwp(env, val & PSR_CWP); #endif @@ -244,18 +267,29 @@ void helper_restored(CPUSPARCState *env) target_ulong cpu_get_ccr(CPUSPARCState *env) { - target_ulong psr; + target_ulong ccr = 0; + + ccr |= (env->icc_C >> 32) & 1; + ccr |= ((int32_t)env->cc_V < 0) << 1; + ccr |= ((int32_t)env->icc_Z == 0) << 2; + ccr |= ((int32_t)env->cc_N < 0) << 3; - psr = cpu_get_psr(env); + ccr |= env->xcc_C << 4; + ccr |= (env->cc_V < 0) << 5; + ccr |= (env->xcc_Z == 0) << 6; + ccr |= (env->cc_N < 0) << 7; - return ((env->xcc >> 20) << 4) | ((psr & PSR_ICC) >> 20); + return ccr; } void cpu_put_ccr(CPUSPARCState *env, target_ulong val) { - env->xcc = (val >> 4) << 20; - env->psr = (val & 0xf) << 20; - CC_OP = CC_OP_FLAGS; + env->cc_N = deposit64(-(val & 0x08), 32, 32, -(val & 0x80)); + env->cc_V = deposit64(-(val & 0x02), 32, 32, -(val & 0x20)); + env->icc_C = (uint64_t)val << 32; + env->xcc_C = (val >> 4) & 1; + env->icc_Z = ~val & 0x04; + env->xcc_Z = ~val & 0x40; } target_ulong cpu_get_cwp64(CPUSPARCState *env) diff --git a/tests/migration/guestperf/comparison.py b/tests/migration/guestperf/comparison.py index c03b3f6d7e..42cc0372d1 100644 --- a/tests/migration/guestperf/comparison.py +++ b/tests/migration/guestperf/comparison.py @@ -135,4 +135,27 @@ COMPARISONS = [ Scenario("compr-multifd-channels-64", multifd=True, multifd_channels=64), ]), + + # Looking at effect of dirty-limit with + # varying x_vcpu_dirty_limit_period + Comparison("compr-dirty-limit-period", scenarios = [ + Scenario("compr-dirty-limit-period-500", + dirty_limit=True, x_vcpu_dirty_limit_period=500), + Scenario("compr-dirty-limit-period-800", + dirty_limit=True, x_vcpu_dirty_limit_period=800), + Scenario("compr-dirty-limit-period-1000", + dirty_limit=True, x_vcpu_dirty_limit_period=1000), + ]), + + + # Looking at effect of dirty-limit with + # varying vcpu_dirty_limit + Comparison("compr-dirty-limit", scenarios = [ + Scenario("compr-dirty-limit-10MB", + dirty_limit=True, vcpu_dirty_limit=10), + Scenario("compr-dirty-limit-20MB", + dirty_limit=True, vcpu_dirty_limit=20), + Scenario("compr-dirty-limit-50MB", + dirty_limit=True, vcpu_dirty_limit=50), + ]), ] diff --git a/tests/migration/guestperf/engine.py b/tests/migration/guestperf/engine.py index da96ca034a..608d7270f6 100644 --- a/tests/migration/guestperf/engine.py +++ b/tests/migration/guestperf/engine.py @@ -102,6 +102,8 @@ class Engine(object): info.get("expected-downtime", 0), info.get("setup-time", 0), info.get("cpu-throttle-percentage", 0), + info.get("dirty-limit-throttle-time-per-round", 0), + info.get("dirty-limit-ring-full-time", 0), ) def _migrate(self, hardware, scenario, src, dst, connect_uri): @@ -203,6 +205,21 @@ class Engine(object): resp = dst.cmd("migrate-set-parameters", multifd_channels=scenario._multifd_channels) + if scenario._dirty_limit: + if not hardware._dirty_ring_size: + raise Exception("dirty ring size must be configured when " + "testing dirty limit migration") + + resp = src.cmd("migrate-set-capabilities", + capabilities = [ + { "capability": "dirty-limit", + "state": True } + ]) + resp = src.cmd("migrate-set-parameters", + x_vcpu_dirty_limit_period=scenario._x_vcpu_dirty_limit_period) + resp = src.cmd("migrate-set-parameters", + vcpu_dirty_limit=scenario._vcpu_dirty_limit) + resp = src.cmd("migrate", uri=connect_uri) post_copy = False @@ -325,7 +342,6 @@ class Engine(object): cmdline = "'" + cmdline + "'" argv = [ - "-accel", "kvm", "-cpu", "host", "-kernel", self._kernel, "-initrd", self._initrd, @@ -333,6 +349,11 @@ class Engine(object): "-m", str((hardware._mem * 1024) + 512), "-smp", str(hardware._cpus), ] + if hardware._dirty_ring_size: + argv.extend(["-accel", "kvm,dirty-ring-size=%s" % + hardware._dirty_ring_size]) + else: + argv.extend(["-accel", "kvm"]) argv.extend(self._get_qemu_serial_args()) diff --git a/tests/migration/guestperf/hardware.py b/tests/migration/guestperf/hardware.py index 3145785ffd..f779cc050b 100644 --- a/tests/migration/guestperf/hardware.py +++ b/tests/migration/guestperf/hardware.py @@ -23,7 +23,8 @@ class Hardware(object): src_cpu_bind=None, src_mem_bind=None, dst_cpu_bind=None, dst_mem_bind=None, prealloc_pages = False, - huge_pages=False, locked_pages=False): + huge_pages=False, locked_pages=False, + dirty_ring_size=0): self._cpus = cpus self._mem = mem # GiB self._src_mem_bind = src_mem_bind # List of NUMA nodes @@ -33,6 +34,7 @@ class Hardware(object): self._prealloc_pages = prealloc_pages self._huge_pages = huge_pages self._locked_pages = locked_pages + self._dirty_ring_size = dirty_ring_size def serialize(self): @@ -46,6 +48,7 @@ class Hardware(object): "prealloc_pages": self._prealloc_pages, "huge_pages": self._huge_pages, "locked_pages": self._locked_pages, + "dirty_ring_size": self._dirty_ring_size, } @classmethod @@ -59,4 +62,5 @@ class Hardware(object): data["dst_mem_bind"], data["prealloc_pages"], data["huge_pages"], - data["locked_pages"]) + data["locked_pages"], + data["dirty_ring_size"]) diff --git a/tests/migration/guestperf/progress.py b/tests/migration/guestperf/progress.py index ab1ee57273..d490584217 100644 --- a/tests/migration/guestperf/progress.py +++ b/tests/migration/guestperf/progress.py @@ -81,7 +81,9 @@ class Progress(object): downtime, downtime_expected, setup_time, - throttle_pcent): + throttle_pcent, + dirty_limit_throttle_time_per_round, + dirty_limit_ring_full_time): self._status = status self._ram = ram @@ -91,6 +93,10 @@ class Progress(object): self._downtime_expected = downtime_expected self._setup_time = setup_time self._throttle_pcent = throttle_pcent + self._dirty_limit_throttle_time_per_round = \ + dirty_limit_throttle_time_per_round + self._dirty_limit_ring_full_time = \ + dirty_limit_ring_full_time def serialize(self): return { @@ -102,6 +108,10 @@ class Progress(object): "downtime_expected": self._downtime_expected, "setup_time": self._setup_time, "throttle_pcent": self._throttle_pcent, + "dirty_limit_throttle_time_per_round": + self._dirty_limit_throttle_time_per_round, + "dirty_limit_ring_full_time": + self._dirty_limit_ring_full_time, } @classmethod @@ -114,4 +124,6 @@ class Progress(object): data["downtime"], data["downtime_expected"], data["setup_time"], - data["throttle_pcent"]) + data["throttle_pcent"], + data["dirty_limit_throttle_time_per_round"], + data["dirty_limit_ring_full_time"]) diff --git a/tests/migration/guestperf/scenario.py b/tests/migration/guestperf/scenario.py index de70d9b2f5..154c4f5d5f 100644 --- a/tests/migration/guestperf/scenario.py +++ b/tests/migration/guestperf/scenario.py @@ -30,7 +30,9 @@ class Scenario(object): auto_converge=False, auto_converge_step=10, compression_mt=False, compression_mt_threads=1, compression_xbzrle=False, compression_xbzrle_cache=10, - multifd=False, multifd_channels=2): + multifd=False, multifd_channels=2, + dirty_limit=False, x_vcpu_dirty_limit_period=500, + vcpu_dirty_limit=1): self._name = name @@ -60,6 +62,10 @@ class Scenario(object): self._multifd = multifd self._multifd_channels = multifd_channels + self._dirty_limit = dirty_limit + self._x_vcpu_dirty_limit_period = x_vcpu_dirty_limit_period + self._vcpu_dirty_limit = vcpu_dirty_limit + def serialize(self): return { "name": self._name, @@ -79,6 +85,9 @@ class Scenario(object): "compression_xbzrle_cache": self._compression_xbzrle_cache, "multifd": self._multifd, "multifd_channels": self._multifd_channels, + "dirty_limit": self._dirty_limit, + "x_vcpu_dirty_limit_period": self._x_vcpu_dirty_limit_period, + "vcpu_dirty_limit": self._vcpu_dirty_limit, } @classmethod diff --git a/tests/migration/guestperf/shell.py b/tests/migration/guestperf/shell.py index 8a809e3dda..c85d89efec 100644 --- a/tests/migration/guestperf/shell.py +++ b/tests/migration/guestperf/shell.py @@ -60,6 +60,8 @@ class BaseShell(object): parser.add_argument("--prealloc-pages", dest="prealloc_pages", default=False) parser.add_argument("--huge-pages", dest="huge_pages", default=False) parser.add_argument("--locked-pages", dest="locked_pages", default=False) + parser.add_argument("--dirty-ring-size", dest="dirty_ring_size", + default=0, type=int) self._parser = parser @@ -89,7 +91,9 @@ class BaseShell(object): locked_pages=args.locked_pages, huge_pages=args.huge_pages, - prealloc_pages=args.prealloc_pages) + prealloc_pages=args.prealloc_pages, + + dirty_ring_size=args.dirty_ring_size) class Shell(BaseShell): @@ -127,6 +131,17 @@ class Shell(BaseShell): parser.add_argument("--multifd-channels", dest="multifd_channels", default=2, type=int) + parser.add_argument("--dirty-limit", dest="dirty_limit", default=False, + action="store_true") + + parser.add_argument("--x-vcpu-dirty-limit-period", + dest="x_vcpu_dirty_limit_period", + default=500, type=int) + + parser.add_argument("--vcpu-dirty-limit", + dest="vcpu_dirty_limit", + default=1, type=int) + def get_scenario(self, args): return Scenario(name="perfreport", downtime=args.downtime, @@ -150,7 +165,12 @@ class Shell(BaseShell): compression_xbzrle_cache=args.compression_xbzrle_cache, multifd=args.multifd, - multifd_channels=args.multifd_channels) + multifd_channels=args.multifd_channels, + + dirty_limit=args.dirty_limit, + x_vcpu_dirty_limit_period=\ + args.x_vcpu_dirty_limit_period, + vcpu_dirty_limit=args.vcpu_dirty_limit) def run(self, argv): args = self._parser.parse_args(argv) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index e803b46039..5752412b64 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -3091,6 +3091,166 @@ static void test_vcpu_dirty_limit(void) dirtylimit_stop_vm(vm); } +static void migrate_dirty_limit_wait_showup(QTestState *from, + const int64_t period, + const int64_t value) +{ + /* Enable dirty limit capability */ + migrate_set_capability(from, "dirty-limit", true); + + /* Set dirty limit parameters */ + migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period); + migrate_set_parameter_int(from, "vcpu-dirty-limit", value); + + /* Make sure migrate can't converge */ + migrate_ensure_non_converge(from); + + /* To check limit rate after precopy */ + migrate_set_capability(from, "pause-before-switchover", true); + + /* Wait for the serial output from the source */ + wait_for_serial("src_serial"); +} + +/* + * This test does: + * source destination + * start vm + * start incoming vm + * migrate + * wait dirty limit to begin + * cancel migrate + * cancellation check + * restart incoming vm + * migrate + * wait dirty limit to begin + * wait pre-switchover event + * convergence condition check + * + * And see if dirty limit migration works correctly. + * This test case involves many passes, so it runs in slow mode only. + */ +static void test_migrate_dirty_limit(void) +{ + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + QTestState *from, *to; + int64_t remaining; + uint64_t throttle_us_per_full; + /* + * We want the test to be stable and as fast as possible. + * E.g., with 1Gb/s bandwith migration may pass without dirty limit, + * so we need to decrease a bandwidth. + */ + const int64_t dirtylimit_period = 1000, dirtylimit_value = 50; + const int64_t max_bandwidth = 400000000; /* ~400Mb/s */ + const int64_t downtime_limit = 250; /* 250ms */ + /* + * We migrate through unix-socket (> 500Mb/s). + * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s). + * So, we can predict expected_threshold + */ + const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000; + int max_try_count = 10; + MigrateCommon args = { + .start = { + .hide_stderr = true, + .use_dirty_ring = true, + }, + .listen_uri = uri, + .connect_uri = uri, + }; + + /* Start src, dst vm */ + if (test_migrate_start(&from, &to, args.listen_uri, &args.start)) { + return; + } + + /* Prepare for dirty limit migration and wait src vm show up */ + migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value); + + /* Start migrate */ + migrate_qmp(from, uri, "{}"); + + /* Wait for dirty limit throttle begin */ + throttle_us_per_full = 0; + while (throttle_us_per_full == 0) { + throttle_us_per_full = + read_migrate_property_int(from, "dirty-limit-throttle-time-per-round"); + usleep(100); + g_assert_false(got_src_stop); + } + + /* Now cancel migrate and wait for dirty limit throttle switch off */ + migrate_cancel(from); + wait_for_migration_status(from, "cancelled", NULL); + + /* Check if dirty limit throttle switched off, set timeout 1ms */ + do { + throttle_us_per_full = + read_migrate_property_int(from, "dirty-limit-throttle-time-per-round"); + usleep(100); + g_assert_false(got_src_stop); + } while (throttle_us_per_full != 0 && --max_try_count); + + /* Assert dirty limit is not in service */ + g_assert_cmpint(throttle_us_per_full, ==, 0); + + args = (MigrateCommon) { + .start = { + .only_target = true, + .use_dirty_ring = true, + }, + .listen_uri = uri, + .connect_uri = uri, + }; + + /* Restart dst vm, src vm already show up so we needn't wait anymore */ + if (test_migrate_start(&from, &to, args.listen_uri, &args.start)) { + return; + } + + /* Start migrate */ + migrate_qmp(from, uri, "{}"); + + /* Wait for dirty limit throttle begin */ + throttle_us_per_full = 0; + while (throttle_us_per_full == 0) { + throttle_us_per_full = + read_migrate_property_int(from, "dirty-limit-throttle-time-per-round"); + usleep(100); + g_assert_false(got_src_stop); + } + + /* + * The dirty limit rate should equals the return value of + * query-vcpu-dirty-limit if dirty limit cap set + */ + g_assert_cmpint(dirtylimit_value, ==, get_limit_rate(from)); + + /* Now, we have tested if dirty limit works, let it converge */ + migrate_set_parameter_int(from, "downtime-limit", downtime_limit); + migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth); + + /* + * Wait for pre-switchover status to check if migration + * satisfy the convergence condition + */ + wait_for_migration_status(from, "pre-switchover", NULL); + + remaining = read_ram_property_int(from, "remaining"); + g_assert_cmpint(remaining, <, + (expected_threshold + expected_threshold / 100)); + + migrate_continue(from, "pre-switchover"); + + qtest_qmp_eventwait(to, "RESUME"); + + wait_for_serial("dest_serial"); + wait_for_migration_complete(from); + + test_migrate_end(from, to, true); +} + static bool kvm_dirty_ring_supported(void) { #if defined(__linux__) && defined(HOST_X86_64) @@ -3301,6 +3461,10 @@ int main(int argc, char **argv) */ if (g_test_slow()) { qtest_add_func("/migration/auto_converge", test_migrate_auto_converge); + if (g_str_equal(arch, "x86_64") && + has_kvm && kvm_dirty_ring_supported()) { + qtest_add_func("/migration/dirty_limit", test_migrate_dirty_limit); + } } qtest_add_func("/migration/multifd/tcp/plain/none", test_multifd_tcp_none); diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c index 73a670e8fa..2c15f60958 100644 --- a/tests/qtest/qmp-cmd-test.c +++ b/tests/qtest/qmp-cmd-test.c @@ -45,6 +45,7 @@ static int query_error_class(const char *cmd) { "query-acpi-ospm-status", ERROR_CLASS_GENERIC_ERROR }, { "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE }, { "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR }, + { "query-hv-balloon-status-report", ERROR_CLASS_GENERIC_ERROR }, { "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR }, /* Only valid with a USB bus added */ { "x-query-usb", ERROR_CLASS_GENERIC_ERROR }, |