diff options
156 files changed, 10615 insertions, 1203 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 317eff6cec..5a56b6e848 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -266,7 +266,8 @@ S: Supported F: target/riscv/ F: hw/riscv/ F: include/hw/riscv/ -F: disas/riscv.c +F: linux-user/host/riscv32/ +F: linux-user/host/riscv64/ S390 M: Richard Henderson <rth@twiddle.net> @@ -1011,6 +1012,14 @@ F: tests/libqos/*spapr* F: tests/rtas* F: tests/libqos/rtas* +XIVE +M: David Gibson <david@gibson.dropbear.id.au> +M: Cédric Le Goater <clg@kaod.org> +L: qemu-ppc@nongnu.org +S: Supported +F: hw/*/*xive* +F: include/hw/*/*xive* + virtex_ml507 M: Edgar E. Iglesias <edgar.iglesias@gmail.com> L: qemu-ppc@nongnu.org @@ -1262,7 +1271,7 @@ M: Michael S. Tsirkin <mst@redhat.com> M: Igor Mammedov <imammedo@redhat.com> S: Supported F: include/hw/acpi/* -F: include/hw/smbios/* +F: include/hw/firmware/smbios.h F: hw/mem/* F: hw/acpi/* F: hw/smbios/* @@ -2164,6 +2173,15 @@ S: Odd Fixes F: tcg/ppc/ F: disas/ppc.c +RISC-V +M: Michael Clark <mjc@sifive.com> +M: Palmer Dabbelt <palmer@sifive.com> +M: Alistair Francis <Alistair.Francis@wdc.com> +L: qemu-riscv@nongnu.org +S: Maintained +F: tcg/riscv/ +F: disas/riscv.c + S390 target M: Richard Henderson <rth@twiddle.net> S: Maintained @@ -2404,6 +2422,8 @@ S: Maintained F: hw/rdma/* F: hw/rdma/vmw/* F: docs/pvrdma.txt +F: contrib/rdmacm-mux/* +F: qapi/rdma.json Build and test automation ------------------------- diff --git a/Makefile b/Makefile index c8b9efdad4..dd53965f77 100644 --- a/Makefile +++ b/Makefile @@ -67,7 +67,7 @@ CONFIG_ALL=y -include config-all-devices.mak -include config-all-disas.mak -config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/pc-bios +config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/pc-bios $(SRC_PATH)/VERSION @echo $@ is out-of-date, running configure @# TODO: The next lines include code which supports a smooth @# transition from old configurations without config.status. @@ -362,6 +362,7 @@ dummy := $(call unnest-vars,, \ elf2dmp-obj-y \ ivshmem-client-obj-y \ ivshmem-server-obj-y \ + rdmacm-mux-obj-y \ libvhost-user-obj-y \ vhost-user-scsi-obj-y \ vhost-user-blk-obj-y \ @@ -579,6 +580,8 @@ vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y) libvhost-user.a $(call LINK, $^) vhost-user-blk$(EXESUF): $(vhost-user-blk-obj-y) libvhost-user.a $(call LINK, $^) +rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) + $(call LINK, $^) module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak $(call quiet-command,$(PYTHON) $< $@ \ diff --git a/Makefile.objs b/Makefile.objs index 56af0347d3..bc5b8a8442 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -1,5 +1,6 @@ QAPI_MODULES = block-core block char common crypto introspect job migration -QAPI_MODULES += misc net rocker run-state sockets tpm trace transaction ui +QAPI_MODULES += misc net rdma rocker run-state sockets tpm trace transaction +QAPI_MODULES += ui ####################################################################### # Common libraries for tools and emulators @@ -133,6 +134,7 @@ vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS) vhost-user-scsi.o-libs := $(LIBISCSI_LIBS) vhost-user-scsi-obj-y = contrib/vhost-user-scsi/ vhost-user-blk-obj-y = contrib/vhost-user-blk/ +rdmacm-mux-obj-y = contrib/rdmacm-mux/ ###################################################################### trace-events-subdirs = diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index cd75829cf2..941295ea49 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -571,6 +571,81 @@ int cpu_signal_handler(int host_signum, void *pinfo, return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); } +#elif defined(__riscv) + +int cpu_signal_handler(int host_signum, void *pinfo, + void *puc) +{ + siginfo_t *info = pinfo; + ucontext_t *uc = puc; + greg_t pc = uc->uc_mcontext.__gregs[REG_PC]; + uint32_t insn = *(uint32_t *)pc; + int is_write = 0; + + /* Detect store by reading the instruction at the program + counter. Note: we currently only generate 32-bit + instructions so we thus only detect 32-bit stores */ + switch (((insn >> 0) & 0b11)) { + case 3: + switch (((insn >> 2) & 0b11111)) { + case 8: + switch (((insn >> 12) & 0b111)) { + case 0: /* sb */ + case 1: /* sh */ + case 2: /* sw */ + case 3: /* sd */ + case 4: /* sq */ + is_write = 1; + break; + default: + break; + } + break; + case 9: + switch (((insn >> 12) & 0b111)) { + case 2: /* fsw */ + case 3: /* fsd */ + case 4: /* fsq */ + is_write = 1; + break; + default: + break; + } + break; + default: + break; + } + } + + /* Check for compressed instructions */ + switch (((insn >> 13) & 0b111)) { + case 7: + switch (insn & 0b11) { + case 0: /*c.sd */ + case 2: /* c.sdsp */ + is_write = 1; + break; + default: + break; + } + break; + case 6: + switch (insn & 0b11) { + case 0: /* c.sw */ + case 3: /* c.swsp */ + is_write = 1; + break; + default: + break; + } + break; + default: + break; + } + + return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); +} + #else #error host CPU specific signal handler needed diff --git a/configure b/configure index 224d3071ac..79375affc1 100755 --- a/configure +++ b/configure @@ -710,6 +710,12 @@ elif check_define __s390__ ; then else cpu="s390" fi +elif check_define __riscv ; then + if check_define _LP64 ; then + cpu="riscv64" + else + cpu="riscv32" + fi elif check_define __arm__ ; then cpu="arm" elif check_define __aarch64__ ; then @@ -722,7 +728,7 @@ ARCH= # Normalise host CPU name and set ARCH. # Note that this case should only have supported host CPUs, not guests. case "$cpu" in - ppc|ppc64|s390|s390x|sparc64|x32) + ppc|ppc64|s390|s390x|sparc64|x32|riscv32|riscv64) cpu="$cpu" supported_cpu="yes" eval "cross_cc_${cpu}=\$host_cc" @@ -6937,6 +6943,8 @@ elif test "$ARCH" = "x86_64" -o "$ARCH" = "x32" ; then QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/i386 $QEMU_INCLUDES" elif test "$ARCH" = "ppc64" ; then QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/ppc $QEMU_INCLUDES" +elif test "$ARCH" = "riscv32" -o "$ARCH" = "riscv64" ; then + QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/riscv $QEMU_INCLUDES" else QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/\$(ARCH) $QEMU_INCLUDES" fi @@ -7433,7 +7441,7 @@ for i in $ARCH $TARGET_BASE_ARCH ; do ppc*) disas_config "PPC" ;; - riscv) + riscv*) disas_config "RISCV" ;; s390*) diff --git a/contrib/elf2dmp/pdb.h b/contrib/elf2dmp/pdb.h index 4351a2dd61..a3a3cac2c1 100644 --- a/contrib/elf2dmp/pdb.h +++ b/contrib/elf2dmp/pdb.h @@ -8,8 +8,6 @@ #ifndef PDB_H #define PDB_H -#include <stdint.h> -#include <stdlib.h> typedef struct GUID { unsigned int Data1; diff --git a/contrib/elf2dmp/pe.h b/contrib/elf2dmp/pe.h index 374e06a9c5..dafb26afbb 100644 --- a/contrib/elf2dmp/pe.h +++ b/contrib/elf2dmp/pe.h @@ -8,7 +8,6 @@ #ifndef PE_H #define PE_H -#include <stdint.h> typedef struct IMAGE_DOS_HEADER { uint16_t e_magic; /* 0x00: MZ Header signature */ diff --git a/contrib/elf2dmp/qemu_elf.h b/contrib/elf2dmp/qemu_elf.h index d85d6558fa..86e6e688fb 100644 --- a/contrib/elf2dmp/qemu_elf.h +++ b/contrib/elf2dmp/qemu_elf.h @@ -8,7 +8,6 @@ #ifndef QEMU_ELF_H #define QEMU_ELF_H -#include <stdint.h> #include <elf.h> typedef struct QEMUCPUSegment { diff --git a/contrib/rdmacm-mux/Makefile.objs b/contrib/rdmacm-mux/Makefile.objs new file mode 100644 index 0000000000..be3eacb6f7 --- /dev/null +++ b/contrib/rdmacm-mux/Makefile.objs @@ -0,0 +1,4 @@ +ifdef CONFIG_PVRDMA +CFLAGS += -libumad -Wno-format-truncation +rdmacm-mux-obj-y = main.o +endif diff --git a/contrib/rdmacm-mux/main.c b/contrib/rdmacm-mux/main.c new file mode 100644 index 0000000000..835a7f9214 --- /dev/null +++ b/contrib/rdmacm-mux/main.c @@ -0,0 +1,798 @@ +/* + * QEMU paravirtual RDMA - rdmacm-mux implementation + * + * Copyright (C) 2018 Oracle + * Copyright (C) 2018 Red Hat Inc + * + * Authors: + * Yuval Shaia <yuval.shaia@oracle.com> + * Marcel Apfelbaum <marcel@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "sys/poll.h" +#include "sys/ioctl.h" +#include "pthread.h" +#include "syslog.h" + +#include "infiniband/verbs.h" +#include "infiniband/umad.h" +#include "infiniband/umad_types.h" +#include "infiniband/umad_sa.h" +#include "infiniband/umad_cm.h" + +#include "rdmacm-mux.h" + +#define SCALE_US 1000 +#define COMMID_TTL 2 /* How many SCALE_US a context of MAD session is saved */ +#define SLEEP_SECS 5 /* This is used both in poll() and thread */ +#define SERVER_LISTEN_BACKLOG 10 +#define MAX_CLIENTS 4096 +#define MAD_RMPP_VERSION 0 +#define MAD_METHOD_MASK0 0x8 + +#define IB_USER_MAD_LONGS_PER_METHOD_MASK (128 / (8 * sizeof(long))) + +#define CM_REQ_DGID_POS 80 +#define CM_SIDR_REQ_DGID_POS 44 + +/* The below can be override by command line parameter */ +#define UNIX_SOCKET_PATH "/var/run/rdmacm-mux" +#define RDMA_PORT_NUM 1 + +typedef struct RdmaCmServerArgs { + char unix_socket_path[PATH_MAX]; + char rdma_dev_name[NAME_MAX]; + int rdma_port_num; +} RdmaCMServerArgs; + +typedef struct CommId2FdEntry { + int fd; + int ttl; /* Initialized to 2, decrement each timeout, entry delete when 0 */ + __be64 gid_ifid; +} CommId2FdEntry; + +typedef struct RdmaCmUMadAgent { + int port_id; + int agent_id; + GHashTable *gid2fd; /* Used to find fd of a given gid */ + GHashTable *commid2fd; /* Used to find fd on of a given comm_id */ +} RdmaCmUMadAgent; + +typedef struct RdmaCmServer { + bool run; + RdmaCMServerArgs args; + struct pollfd fds[MAX_CLIENTS]; + int nfds; + RdmaCmUMadAgent umad_agent; + pthread_t umad_recv_thread; + pthread_rwlock_t lock; +} RdmaCMServer; + +static RdmaCMServer server = {0}; + +static void usage(const char *progname) +{ + printf("Usage: %s [OPTION]...\n" + "Start a RDMA-CM multiplexer\n" + "\n" + "\t-h Show this help\n" + "\t-d rdma-device-name Name of RDMA device to register with\n" + "\t-s unix-socket-path Path to unix socket to listen on (default %s)\n" + "\t-p rdma-device-port Port number of RDMA device to register with (default %d)\n", + progname, UNIX_SOCKET_PATH, RDMA_PORT_NUM); +} + +static void help(const char *progname) +{ + fprintf(stderr, "Try '%s -h' for more information.\n", progname); +} + +static void parse_args(int argc, char *argv[]) +{ + int c; + char unix_socket_path[PATH_MAX]; + + strcpy(server.args.rdma_dev_name, ""); + strcpy(unix_socket_path, UNIX_SOCKET_PATH); + server.args.rdma_port_num = RDMA_PORT_NUM; + + while ((c = getopt(argc, argv, "hs:d:p:")) != -1) { + switch (c) { + case 'h': + usage(argv[0]); + exit(0); + + case 'd': + strncpy(server.args.rdma_dev_name, optarg, NAME_MAX - 1); + break; + + case 's': + /* This is temporary, final name will build below */ + strncpy(unix_socket_path, optarg, PATH_MAX); + break; + + case 'p': + server.args.rdma_port_num = atoi(optarg); + break; + + default: + help(argv[0]); + exit(1); + } + } + + if (!strcmp(server.args.rdma_dev_name, "")) { + fprintf(stderr, "Missing RDMA device name\n"); + help(argv[0]); + exit(1); + } + + /* Build unique unix-socket file name */ + snprintf(server.args.unix_socket_path, PATH_MAX, "%s-%s-%d", + unix_socket_path, server.args.rdma_dev_name, + server.args.rdma_port_num); + + syslog(LOG_INFO, "unix_socket_path=%s", server.args.unix_socket_path); + syslog(LOG_INFO, "rdma-device-name=%s", server.args.rdma_dev_name); + syslog(LOG_INFO, "rdma-device-port=%d", server.args.rdma_port_num); +} + +static void hash_tbl_alloc(void) +{ + + server.umad_agent.gid2fd = g_hash_table_new_full(g_int64_hash, + g_int64_equal, + g_free, g_free); + server.umad_agent.commid2fd = g_hash_table_new_full(g_int_hash, + g_int_equal, + g_free, g_free); +} + +static void hash_tbl_free(void) +{ + if (server.umad_agent.commid2fd) { + g_hash_table_destroy(server.umad_agent.commid2fd); + } + if (server.umad_agent.gid2fd) { + g_hash_table_destroy(server.umad_agent.gid2fd); + } +} + + +static int _hash_tbl_search_fd_by_ifid(__be64 *gid_ifid) +{ + int *fd; + + fd = g_hash_table_lookup(server.umad_agent.gid2fd, gid_ifid); + if (!fd) { + /* Let's try IPv4 */ + *gid_ifid |= 0x00000000ffff0000; + fd = g_hash_table_lookup(server.umad_agent.gid2fd, gid_ifid); + } + + return fd ? *fd : 0; +} + +static int hash_tbl_search_fd_by_ifid(int *fd, __be64 *gid_ifid) +{ + pthread_rwlock_rdlock(&server.lock); + *fd = _hash_tbl_search_fd_by_ifid(gid_ifid); + pthread_rwlock_unlock(&server.lock); + + if (!fd) { + syslog(LOG_WARNING, "Can't find matching for ifid 0x%llx\n", *gid_ifid); + return -ENOENT; + } + + return 0; +} + +static int hash_tbl_search_fd_by_comm_id(uint32_t comm_id, int *fd, + __be64 *gid_idid) +{ + CommId2FdEntry *fde; + + pthread_rwlock_rdlock(&server.lock); + fde = g_hash_table_lookup(server.umad_agent.commid2fd, &comm_id); + pthread_rwlock_unlock(&server.lock); + + if (!fde) { + syslog(LOG_WARNING, "Can't find matching for comm_id 0x%x\n", comm_id); + return -ENOENT; + } + + *fd = fde->fd; + *gid_idid = fde->gid_ifid; + + return 0; +} + +static RdmaCmMuxErrCode add_fd_ifid_pair(int fd, __be64 gid_ifid) +{ + int fd1; + + pthread_rwlock_wrlock(&server.lock); + + fd1 = _hash_tbl_search_fd_by_ifid(&gid_ifid); + if (fd1) { /* record already exist - an error */ + pthread_rwlock_unlock(&server.lock); + return fd == fd1 ? RDMACM_MUX_ERR_CODE_EEXIST : + RDMACM_MUX_ERR_CODE_EACCES; + } + + g_hash_table_insert(server.umad_agent.gid2fd, g_memdup(&gid_ifid, + sizeof(gid_ifid)), g_memdup(&fd, sizeof(fd))); + + pthread_rwlock_unlock(&server.lock); + + syslog(LOG_INFO, "0x%lx registered on socket %d", + be64toh((uint64_t)gid_ifid), fd); + + return RDMACM_MUX_ERR_CODE_OK; +} + +static RdmaCmMuxErrCode delete_fd_ifid_pair(int fd, __be64 gid_ifid) +{ + int fd1; + + pthread_rwlock_wrlock(&server.lock); + + fd1 = _hash_tbl_search_fd_by_ifid(&gid_ifid); + if (!fd1) { /* record not exist - an error */ + pthread_rwlock_unlock(&server.lock); + return RDMACM_MUX_ERR_CODE_ENOTFOUND; + } + + g_hash_table_remove(server.umad_agent.gid2fd, g_memdup(&gid_ifid, + sizeof(gid_ifid))); + pthread_rwlock_unlock(&server.lock); + + syslog(LOG_INFO, "0x%lx unregistered on socket %d", + be64toh((uint64_t)gid_ifid), fd); + + return RDMACM_MUX_ERR_CODE_OK; +} + +static void hash_tbl_save_fd_comm_id_pair(int fd, uint32_t comm_id, + uint64_t gid_ifid) +{ + CommId2FdEntry fde = {fd, COMMID_TTL, gid_ifid}; + + pthread_rwlock_wrlock(&server.lock); + g_hash_table_insert(server.umad_agent.commid2fd, + g_memdup(&comm_id, sizeof(comm_id)), + g_memdup(&fde, sizeof(fde))); + pthread_rwlock_unlock(&server.lock); +} + +static gboolean remove_old_comm_ids(gpointer key, gpointer value, + gpointer user_data) +{ + CommId2FdEntry *fde = (CommId2FdEntry *)value; + + return !fde->ttl--; +} + +static gboolean remove_entry_from_gid2fd(gpointer key, gpointer value, + gpointer user_data) +{ + if (*(int *)value == *(int *)user_data) { + syslog(LOG_INFO, "0x%lx unregistered on socket %d", + be64toh(*(uint64_t *)key), *(int *)value); + return true; + } + + return false; +} + +static void hash_tbl_remove_fd_ifid_pair(int fd) +{ + pthread_rwlock_wrlock(&server.lock); + g_hash_table_foreach_remove(server.umad_agent.gid2fd, + remove_entry_from_gid2fd, (gpointer)&fd); + pthread_rwlock_unlock(&server.lock); +} + +static int get_fd(const char *mad, int *fd, __be64 *gid_ifid) +{ + struct umad_hdr *hdr = (struct umad_hdr *)mad; + char *data = (char *)hdr + sizeof(*hdr); + int32_t comm_id = 0; + uint16_t attr_id = be16toh(hdr->attr_id); + int rc = 0; + + switch (attr_id) { + case UMAD_CM_ATTR_REQ: + memcpy(gid_ifid, data + CM_REQ_DGID_POS, sizeof(*gid_ifid)); + rc = hash_tbl_search_fd_by_ifid(fd, gid_ifid); + break; + + case UMAD_CM_ATTR_SIDR_REQ: + memcpy(gid_ifid, data + CM_SIDR_REQ_DGID_POS, sizeof(*gid_ifid)); + rc = hash_tbl_search_fd_by_ifid(fd, gid_ifid); + break; + + case UMAD_CM_ATTR_REP: + /* Fall through */ + case UMAD_CM_ATTR_REJ: + /* Fall through */ + case UMAD_CM_ATTR_DREQ: + /* Fall through */ + case UMAD_CM_ATTR_DREP: + /* Fall through */ + case UMAD_CM_ATTR_RTU: + data += sizeof(comm_id); + /* Fall through */ + case UMAD_CM_ATTR_SIDR_REP: + memcpy(&comm_id, data, sizeof(comm_id)); + if (comm_id) { + rc = hash_tbl_search_fd_by_comm_id(comm_id, fd, gid_ifid); + } + break; + + default: + rc = -EINVAL; + syslog(LOG_WARNING, "Unsupported attr_id 0x%x\n", attr_id); + } + + syslog(LOG_DEBUG, "mad_to_vm: %d 0x%x 0x%x\n", *fd, attr_id, comm_id); + + return rc; +} + +static void *umad_recv_thread_func(void *args) +{ + int rc; + RdmaCmMuxMsg msg = {0}; + int fd = -2; + + msg.hdr.msg_type = RDMACM_MUX_MSG_TYPE_REQ; + msg.hdr.op_code = RDMACM_MUX_OP_CODE_MAD; + + while (server.run) { + do { + msg.umad_len = sizeof(msg.umad.mad); + rc = umad_recv(server.umad_agent.port_id, &msg.umad, &msg.umad_len, + SLEEP_SECS * SCALE_US); + if ((rc == -EIO) || (rc == -EINVAL)) { + syslog(LOG_CRIT, "Fatal error while trying to read MAD"); + } + + if (rc == -ETIMEDOUT) { + g_hash_table_foreach_remove(server.umad_agent.commid2fd, + remove_old_comm_ids, NULL); + } + } while (rc && server.run); + + if (server.run) { + rc = get_fd(msg.umad.mad, &fd, &msg.hdr.sgid.global.interface_id); + if (rc) { + continue; + } + + send(fd, &msg, sizeof(msg), 0); + } + } + + return NULL; +} + +static int read_and_process(int fd) +{ + int rc; + RdmaCmMuxMsg msg = {0}; + struct umad_hdr *hdr; + uint32_t *comm_id = 0; + uint16_t attr_id; + + rc = recv(fd, &msg, sizeof(msg), 0); + syslog(LOG_DEBUG, "Socket %d, recv %d\n", fd, rc); + + if (rc < 0 && errno != EWOULDBLOCK) { + syslog(LOG_ERR, "Fail to read from socket %d\n", fd); + return -EIO; + } + + if (!rc) { + syslog(LOG_ERR, "Fail to read from socket %d\n", fd); + return -EPIPE; + } + + if (msg.hdr.msg_type != RDMACM_MUX_MSG_TYPE_REQ) { + syslog(LOG_WARNING, "Got non-request message (%d) from socket %d\n", + msg.hdr.msg_type, fd); + return -EPERM; + } + + switch (msg.hdr.op_code) { + case RDMACM_MUX_OP_CODE_REG: + rc = add_fd_ifid_pair(fd, msg.hdr.sgid.global.interface_id); + break; + + case RDMACM_MUX_OP_CODE_UNREG: + rc = delete_fd_ifid_pair(fd, msg.hdr.sgid.global.interface_id); + break; + + case RDMACM_MUX_OP_CODE_MAD: + /* If this is REQ or REP then store the pair comm_id,fd to be later + * used for other messages where gid is unknown */ + hdr = (struct umad_hdr *)msg.umad.mad; + attr_id = be16toh(hdr->attr_id); + if ((attr_id == UMAD_CM_ATTR_REQ) || (attr_id == UMAD_CM_ATTR_DREQ) || + (attr_id == UMAD_CM_ATTR_SIDR_REQ) || + (attr_id == UMAD_CM_ATTR_REP) || (attr_id == UMAD_CM_ATTR_DREP)) { + comm_id = (uint32_t *)(msg.umad.mad + sizeof(*hdr)); + hash_tbl_save_fd_comm_id_pair(fd, *comm_id, + msg.hdr.sgid.global.interface_id); + } + + syslog(LOG_DEBUG, "vm_to_mad: %d 0x%x 0x%x\n", fd, attr_id, + comm_id ? *comm_id : 0); + rc = umad_send(server.umad_agent.port_id, server.umad_agent.agent_id, + &msg.umad, msg.umad_len, 1, 0); + if (rc) { + syslog(LOG_ERR, + "Fail to send MAD message (0x%x) from socket %d, err=%d", + attr_id, fd, rc); + } + break; + + default: + syslog(LOG_ERR, "Got invalid op_code (%d) from socket %d", + msg.hdr.msg_type, fd); + rc = RDMACM_MUX_ERR_CODE_EINVAL; + } + + msg.hdr.msg_type = RDMACM_MUX_MSG_TYPE_RESP; + msg.hdr.err_code = rc; + rc = send(fd, &msg, sizeof(msg), 0); + + return rc == sizeof(msg) ? 0 : -EPIPE; +} + +static int accept_all(void) +{ + int fd, rc = 0;; + + pthread_rwlock_wrlock(&server.lock); + + do { + if ((server.nfds + 1) > MAX_CLIENTS) { + syslog(LOG_WARNING, "Too many clients (%d)", server.nfds); + rc = -EIO; + goto out; + } + + fd = accept(server.fds[0].fd, NULL, NULL); + if (fd < 0) { + if (errno != EWOULDBLOCK) { + syslog(LOG_WARNING, "accept() failed"); + rc = -EIO; + goto out; + } + break; + } + + syslog(LOG_INFO, "Client connected on socket %d\n", fd); + server.fds[server.nfds].fd = fd; + server.fds[server.nfds].events = POLLIN; + server.nfds++; + } while (fd != -1); + +out: + pthread_rwlock_unlock(&server.lock); + return rc; +} + +static void compress_fds(void) +{ + int i, j; + int closed = 0; + + pthread_rwlock_wrlock(&server.lock); + + for (i = 1; i < server.nfds; i++) { + if (!server.fds[i].fd) { + closed++; + for (j = i; j < server.nfds - 1; j++) { + server.fds[j] = server.fds[j + 1]; + } + } + } + + server.nfds -= closed; + + pthread_rwlock_unlock(&server.lock); +} + +static void close_fd(int idx) +{ + close(server.fds[idx].fd); + syslog(LOG_INFO, "Socket %d closed\n", server.fds[idx].fd); + hash_tbl_remove_fd_ifid_pair(server.fds[idx].fd); + server.fds[idx].fd = 0; +} + +static void run(void) +{ + int rc, nfds, i; + bool compress = false; + + syslog(LOG_INFO, "Service started"); + + while (server.run) { + rc = poll(server.fds, server.nfds, SLEEP_SECS * SCALE_US); + if (rc < 0) { + if (errno != EINTR) { + syslog(LOG_WARNING, "poll() failed"); + } + continue; + } + + if (rc == 0) { + continue; + } + + nfds = server.nfds; + for (i = 0; i < nfds; i++) { + syslog(LOG_DEBUG, "pollfd[%d]: revents 0x%x, events 0x%x\n", i, + server.fds[i].revents, server.fds[i].events); + if (server.fds[i].revents == 0) { + continue; + } + + if (server.fds[i].revents != POLLIN) { + if (i == 0) { + syslog(LOG_NOTICE, "Unexpected poll() event (0x%x)\n", + server.fds[i].revents); + } else { + close_fd(i); + compress = true; + } + continue; + } + + if (i == 0) { + rc = accept_all(); + if (rc) { + continue; + } + } else { + rc = read_and_process(server.fds[i].fd); + if (rc) { + close_fd(i); + compress = true; + } + } + } + + if (compress) { + compress = false; + compress_fds(); + } + } +} + +static void fini_listener(void) +{ + int i; + + if (server.fds[0].fd <= 0) { + return; + } + + for (i = server.nfds - 1; i >= 0; i--) { + if (server.fds[i].fd) { + close(server.fds[i].fd); + } + } + + unlink(server.args.unix_socket_path); +} + +static void fini_umad(void) +{ + if (server.umad_agent.agent_id) { + umad_unregister(server.umad_agent.port_id, server.umad_agent.agent_id); + } + + if (server.umad_agent.port_id) { + umad_close_port(server.umad_agent.port_id); + } + + hash_tbl_free(); +} + +static void fini(void) +{ + if (server.umad_recv_thread) { + pthread_join(server.umad_recv_thread, NULL); + server.umad_recv_thread = 0; + } + fini_umad(); + fini_listener(); + pthread_rwlock_destroy(&server.lock); + + syslog(LOG_INFO, "Service going down"); +} + +static int init_listener(void) +{ + struct sockaddr_un sun; + int rc, on = 1; + + server.fds[0].fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (server.fds[0].fd < 0) { + syslog(LOG_ALERT, "socket() failed"); + return -EIO; + } + + rc = setsockopt(server.fds[0].fd, SOL_SOCKET, SO_REUSEADDR, (char *)&on, + sizeof(on)); + if (rc < 0) { + syslog(LOG_ALERT, "setsockopt() failed"); + rc = -EIO; + goto err; + } + + rc = ioctl(server.fds[0].fd, FIONBIO, (char *)&on); + if (rc < 0) { + syslog(LOG_ALERT, "ioctl() failed"); + rc = -EIO; + goto err; + } + + if (strlen(server.args.unix_socket_path) >= sizeof(sun.sun_path)) { + syslog(LOG_ALERT, + "Invalid unix_socket_path, size must be less than %ld\n", + sizeof(sun.sun_path)); + rc = -EINVAL; + goto err; + } + + sun.sun_family = AF_UNIX; + rc = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s", + server.args.unix_socket_path); + if (rc < 0 || rc >= sizeof(sun.sun_path)) { + syslog(LOG_ALERT, "Could not copy unix socket path\n"); + rc = -EINVAL; + goto err; + } + + rc = bind(server.fds[0].fd, (struct sockaddr *)&sun, sizeof(sun)); + if (rc < 0) { + syslog(LOG_ALERT, "bind() failed"); + rc = -EIO; + goto err; + } + + rc = listen(server.fds[0].fd, SERVER_LISTEN_BACKLOG); + if (rc < 0) { + syslog(LOG_ALERT, "listen() failed"); + rc = -EIO; + goto err; + } + + server.fds[0].events = POLLIN; + server.nfds = 1; + server.run = true; + + return 0; + +err: + close(server.fds[0].fd); + return rc; +} + +static int init_umad(void) +{ + long method_mask[IB_USER_MAD_LONGS_PER_METHOD_MASK]; + + server.umad_agent.port_id = umad_open_port(server.args.rdma_dev_name, + server.args.rdma_port_num); + + if (server.umad_agent.port_id < 0) { + syslog(LOG_WARNING, "umad_open_port() failed"); + return -EIO; + } + + memset(&method_mask, 0, sizeof(method_mask)); + method_mask[0] = MAD_METHOD_MASK0; + server.umad_agent.agent_id = umad_register(server.umad_agent.port_id, + UMAD_CLASS_CM, + UMAD_SA_CLASS_VERSION, + MAD_RMPP_VERSION, method_mask); + if (server.umad_agent.agent_id < 0) { + syslog(LOG_WARNING, "umad_register() failed"); + return -EIO; + } + + hash_tbl_alloc(); + + return 0; +} + +static void signal_handler(int sig, siginfo_t *siginfo, void *context) +{ + static bool warned; + + /* Prevent stop if clients are connected */ + if (server.nfds != 1) { + if (!warned) { + syslog(LOG_WARNING, + "Can't stop while active client exist, resend SIGINT to overid"); + warned = true; + return; + } + } + + if (sig == SIGINT) { + server.run = false; + fini(); + } + + exit(0); +} + +static int init(void) +{ + int rc; + struct sigaction sig = {0}; + + rc = init_listener(); + if (rc) { + return rc; + } + + rc = init_umad(); + if (rc) { + return rc; + } + + pthread_rwlock_init(&server.lock, 0); + + rc = pthread_create(&server.umad_recv_thread, NULL, umad_recv_thread_func, + NULL); + if (rc) { + syslog(LOG_ERR, "Fail to create UMAD receiver thread (%d)\n", rc); + return rc; + } + + sig.sa_sigaction = &signal_handler; + sig.sa_flags = SA_SIGINFO; + rc = sigaction(SIGINT, &sig, NULL); + if (rc < 0) { + syslog(LOG_ERR, "Fail to install SIGINT handler (%d)\n", errno); + return rc; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + int rc; + + memset(&server, 0, sizeof(server)); + + parse_args(argc, argv); + + rc = init(); + if (rc) { + syslog(LOG_ERR, "Fail to initialize server (%d)\n", rc); + rc = -EAGAIN; + goto out; + } + + run(); + +out: + fini(); + + return rc; +} diff --git a/contrib/rdmacm-mux/rdmacm-mux.h b/contrib/rdmacm-mux/rdmacm-mux.h new file mode 100644 index 0000000000..942a802c47 --- /dev/null +++ b/contrib/rdmacm-mux/rdmacm-mux.h @@ -0,0 +1,61 @@ +/* + * QEMU paravirtual RDMA - rdmacm-mux declarations + * + * Copyright (C) 2018 Oracle + * Copyright (C) 2018 Red Hat Inc + * + * Authors: + * Yuval Shaia <yuval.shaia@oracle.com> + * Marcel Apfelbaum <marcel@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef RDMACM_MUX_H +#define RDMACM_MUX_H + +#include "linux/if.h" +#include "infiniband/verbs.h" +#include "infiniband/umad.h" +#include "rdma/rdma_user_cm.h" + +typedef enum RdmaCmMuxMsgType { + RDMACM_MUX_MSG_TYPE_REQ = 0, + RDMACM_MUX_MSG_TYPE_RESP = 1, +} RdmaCmMuxMsgType; + +typedef enum RdmaCmMuxOpCode { + RDMACM_MUX_OP_CODE_REG = 0, + RDMACM_MUX_OP_CODE_UNREG = 1, + RDMACM_MUX_OP_CODE_MAD = 2, +} RdmaCmMuxOpCode; + +typedef enum RdmaCmMuxErrCode { + RDMACM_MUX_ERR_CODE_OK = 0, + RDMACM_MUX_ERR_CODE_EINVAL = 1, + RDMACM_MUX_ERR_CODE_EEXIST = 2, + RDMACM_MUX_ERR_CODE_EACCES = 3, + RDMACM_MUX_ERR_CODE_ENOTFOUND = 4, +} RdmaCmMuxErrCode; + +typedef struct RdmaCmMuxHdr { + RdmaCmMuxMsgType msg_type; + RdmaCmMuxOpCode op_code; + union ibv_gid sgid; + RdmaCmMuxErrCode err_code; +} RdmaCmUHdr; + +typedef struct RdmaCmUMad { + struct ib_user_mad hdr; + char mad[RDMA_MAX_PRIVATE_DATA]; +} RdmaCmUMad; + +typedef struct RdmaCmMuxMsg { + RdmaCmUHdr hdr; + int umad_len; + RdmaCmUMad umad; +} RdmaCmMuxMsg; + +#endif diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c index 571f114a56..858221ad95 100644 --- a/contrib/vhost-user-blk/vhost-user-blk.c +++ b/contrib/vhost-user-blk/vhost-user-blk.c @@ -20,7 +20,6 @@ #include "contrib/libvhost-user/libvhost-user-glib.h" #include "contrib/libvhost-user/libvhost-user.h" -#include <glib.h> struct virtio_blk_inhdr { unsigned char status; diff --git a/contrib/vhost-user-scsi/vhost-user-scsi.c b/contrib/vhost-user-scsi/vhost-user-scsi.c index 02c29019d1..496dd6e693 100644 --- a/contrib/vhost-user-scsi/vhost-user-scsi.c +++ b/contrib/vhost-user-scsi/vhost-user-scsi.c @@ -16,7 +16,6 @@ #include "contrib/libvhost-user/libvhost-user-glib.h" #include "standard-headers/linux/virtio_scsi.h" -#include <glib.h> #define VUS_ISCSI_INITIATOR "iqn.2016-11.com.nutanix:vhost-user-scsi" diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak index aec2855750..7f34ad0528 100644 --- a/default-configs/ppc64-softmmu.mak +++ b/default-configs/ppc64-softmmu.mak @@ -16,6 +16,8 @@ CONFIG_VIRTIO_VGA=y CONFIG_XICS=$(CONFIG_PSERIES) CONFIG_XICS_SPAPR=$(CONFIG_PSERIES) CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM)) +CONFIG_XIVE=$(CONFIG_PSERIES) +CONFIG_XIVE_SPAPR=$(CONFIG_PSERIES) CONFIG_MEM_DEVICE=y CONFIG_DIMM=y CONFIG_SPAPR_RNG=y diff --git a/disas.c b/disas.c index f9c517b358..d9aa713a40 100644 --- a/disas.c +++ b/disas.c @@ -522,8 +522,14 @@ void disas(FILE *out, void *code, unsigned long size) # ifdef _ARCH_PPC64 s.info.cap_mode = CS_MODE_64; # endif -#elif defined(__riscv__) - print_insn = print_insn_riscv; +#elif defined(__riscv) && defined(CONFIG_RISCV_DIS) +#if defined(_ILP32) || (__riscv_xlen == 32) + print_insn = print_insn_riscv32; +#elif defined(_LP64) + print_insn = print_insn_riscv64; +#else +#error unsupported RISC-V ABI +#endif #elif defined(__aarch64__) && defined(CONFIG_ARM_A64_DIS) print_insn = print_insn_arm_a64; s.info.cap_arch = CS_ARCH_ARM64; diff --git a/disas/microblaze.c b/disas/microblaze.c index 598ecbc89d..c23605043a 100644 --- a/disas/microblaze.c +++ b/disas/microblaze.c @@ -176,7 +176,6 @@ enum microblaze_instr_type { #define REG_TLBSX 36869 /* MMU: TLB Search Index reg */ /* alternate names for gen purpose regs */ -#define REG_SP 1 /* stack pointer */ #define REG_ROSDP 2 /* read-only small data pointer */ #define REG_RWSDP 13 /* read-write small data pointer */ diff --git a/disas/ppc.c b/disas/ppc.c index 5ab9c35a84..da1140ba2b 100644 --- a/disas/ppc.c +++ b/disas/ppc.c @@ -3734,6 +3734,8 @@ const struct powerpc_opcode powerpc_opcodes[] = { { "addmeo.", XO(31,234,1,1), XORB_MASK, PPCCOM, { RT, RA } }, { "ameo.", XO(31,234,1,1), XORB_MASK, PWRCOM, { RT, RA } }, +{ "addex", XO(31,170,0,0), XO_MASK, POWER9, { RT, RA, RB } }, + { "mullw", XO(31,235,0,0), XO_MASK, PPCCOM, { RT, RA, RB } }, { "muls", XO(31,235,0,0), XO_MASK, PWRCOM, { RT, RA, RB } }, { "mullw.", XO(31,235,0,1), XO_MASK, PPCCOM, { RT, RA, RB } }, diff --git a/docs/pvrdma.txt b/docs/pvrdma.txt index 5599318159..5175251b47 100644 --- a/docs/pvrdma.txt +++ b/docs/pvrdma.txt @@ -9,8 +9,9 @@ It works with its Linux Kernel driver AS IS, no need for any special guest modifications. While it complies with the VMware device, it can also communicate with bare -metal RDMA-enabled machines and does not require an RDMA HCA in the host, it -can work with Soft-RoCE (rxe). +metal RDMA-enabled machines as peers. + +It does not require an RDMA HCA in the host, it can work with Soft-RoCE (rxe). It does not require the whole guest RAM to be pinned allowing memory over-commit and, even if not implemented yet, migration support will be @@ -78,29 +79,116 @@ the required RDMA libraries. 3. Usage ======== + + +3.1 VM Memory settings +====================== Currently the device is working only with memory backed RAM and it must be mark as "shared": -m 1G \ -object memory-backend-ram,id=mb1,size=1G,share \ -numa node,memdev=mb1 \ -The pvrdma device is composed of two functions: - - Function 0 is a vmxnet Ethernet Device which is redundant in Guest - but is required to pass the ibdevice GID using its MAC. - Examples: - For an rxe backend using eth0 interface it will use its mac: - -device vmxnet3,addr=<slot>.0,multifunction=on,mac=<eth0 MAC> - For an SRIOV VF, we take the Ethernet Interface exposed by it: - -device vmxnet3,multifunction=on,mac=<RoCE eth MAC> - - Function 1 is the actual device: - -device pvrdma,addr=<slot>.1,backend-dev=<ibdevice>,backend-gid-idx=<gid>,backend-port=<port> - where the ibdevice can be rxe or RDMA VF (e.g. mlx5_4) - Note: Pay special attention that the GID at backend-gid-idx matches vmxnet's MAC. - The rules of conversion are part of the RoCE spec, but since manual conversion - is not required, spotting problems is not hard: - Example: GID: fe80:0000:0000:0000:7efe:90ff:fecb:743a - MAC: 7c:fe:90:cb:74:3a - Note the difference between the first byte of the MAC and the GID. + +3.2 MAD Multiplexer +=================== +MAD Multiplexer is a service that exposes MAD-like interface for VMs in +order to overcome the limitation where only single entity can register with +MAD layer to send and receive RDMA-CM MAD packets. + +To build rdmacm-mux run +# make rdmacm-mux + +The application accepts 3 command line arguments and exposes a UNIX socket +to pass control and data to it. +-d rdma-device-name Name of RDMA device to register with +-s unix-socket-path Path to unix socket to listen (default /var/run/rdmacm-mux) +-p rdma-device-port Port number of RDMA device to register with (default 1) +The final UNIX socket file name is a concatenation of the 3 arguments so +for example for device mlx5_0 on port 2 this /var/run/rdmacm-mux-mlx5_0-2 +will be created. + +pvrdma requires this service. + +Please refer to contrib/rdmacm-mux for more details. + + +3.3 Service exposed by libvirt daemon +===================================== +The control over the RDMA device's GID table is done by updating the +device's Ethernet function addresses. +Usually the first GID entry is determined by the MAC address, the second by +the first IPv6 address and the third by the IPv4 address. Other entries can +be added by adding more IP addresses. The opposite is the same, i.e. +whenever an address is removed, the corresponding GID entry is removed. +The process is done by the network and RDMA stacks. Whenever an address is +added the ib_core driver is notified and calls the device driver add_gid +function which in turn update the device. +To support this in pvrdma device the device hooks into the create_bind and +destroy_bind HW commands triggered by pvrdma driver in guest. + +Whenever changed is made to the pvrdma port's GID table a special QMP +messages is sent to be processed by libvirt to update the address of the +backend Ethernet device. + +pvrdma requires that libvirt service will be up. + + +3.4 PCI devices settings +======================== +RoCE device exposes two functions - an Ethernet and RDMA. +To support it, pvrdma device is composed of two PCI functions, an Ethernet +device of type vmxnet3 on PCI slot 0 and a PVRDMA device on PCI slot 1. The +Ethernet function can be used for other Ethernet purposes such as IP. + + +3.5 Device parameters +===================== +- netdev: Specifies the Ethernet device function name on the host for + example enp175s0f0. For Soft-RoCE device (rxe) this would be the Ethernet + device used to create it. +- ibdev: The IB device name on host for example rxe0, mlx5_0 etc. +- mad-chardev: The name of the MAD multiplexer char device. +- ibport: In case of multi-port device (such as Mellanox's HCA) this + specify the port to use. If not set 1 will be used. +- dev-caps-max-mr-size: The maximum size of MR. +- dev-caps-max-qp: Maximum number of QPs. +- dev-caps-max-sge: Maximum number of SGE elements in WR. +- dev-caps-max-cq: Maximum number of CQs. +- dev-caps-max-mr: Maximum number of MRs. +- dev-caps-max-pd: Maximum number of PDs. +- dev-caps-max-ah: Maximum number of AHs. + +Notes: +- The first 3 parameters are mandatory settings, the rest have their + defaults. +- The last 8 parameters (the ones that prefixed by dev-caps) defines the top + limits but the final values is adjusted by the backend device limitations. +- netdev can be extracted from ibdev's sysfs + (/sys/class/infiniband/<ibdev>/device/net/) + + +3.6 Example +=========== +Define bridge device with vmxnet3 network backend: +<interface type='bridge'> + <mac address='56:b4:44:e9:62:dc'/> + <source bridge='bridge1'/> + <model type='vmxnet3'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x10' function='0x0' multifunction='on'/> +</interface> + +Define pvrdma device: +<qemu:commandline> + <qemu:arg value='-object'/> + <qemu:arg value='memory-backend-ram,id=mb1,size=1G,share'/> + <qemu:arg value='-numa'/> + <qemu:arg value='node,memdev=mb1'/> + <qemu:arg value='-chardev'/> + <qemu:arg value='socket,path=/var/run/rdmacm-mux-rxe0-1,id=mads'/> + <qemu:arg value='-device'/> + <qemu:arg value='pvrdma,addr=10.1,ibdev=rxe0,netdev=bridge0,mad-chardev=mads'/> +</qemu:commandline> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 1e43cd736d..555c24f21d 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -1589,6 +1589,74 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre) g_array_free(tables->vmgenid, mfre); } +/* + * ACPI spec 5.2.5.3 Root System Description Pointer (RSDP). + * (Revision 1.0 or later) + */ +void +build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data) +{ + int tbl_off = tbl->len; /* Table offset in the RSDP file */ + + switch (rsdp_data->revision) { + case 0: + /* With ACPI 1.0, we must have an RSDT pointer */ + g_assert(rsdp_data->rsdt_tbl_offset); + break; + case 2: + /* With ACPI 2.0+, we must have an XSDT pointer */ + g_assert(rsdp_data->xsdt_tbl_offset); + break; + default: + /* Only revisions 0 (ACPI 1.0) and 2 (ACPI 2.0+) are valid for RSDP */ + g_assert_not_reached(); + } + + bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, tbl, 16, + true /* fseg memory */); + + g_array_append_vals(tbl, "RSD PTR ", 8); /* Signature */ + build_append_int_noprefix(tbl, 0, 1); /* Checksum */ + g_array_append_vals(tbl, rsdp_data->oem_id, 6); /* OEMID */ + build_append_int_noprefix(tbl, rsdp_data->revision, 1); /* Revision */ + build_append_int_noprefix(tbl, 0, 4); /* RsdtAddress */ + if (rsdp_data->rsdt_tbl_offset) { + /* RSDT address to be filled by guest linker */ + bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE, + tbl_off + 16, 4, + ACPI_BUILD_TABLE_FILE, + *rsdp_data->rsdt_tbl_offset); + } + + /* Checksum to be filled by guest linker */ + bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, + tbl_off, 20, /* ACPI rev 1.0 RSDP size */ + 8); + + if (rsdp_data->revision == 0) { + /* ACPI 1.0 RSDP, we're done */ + return; + } + + build_append_int_noprefix(tbl, 36, 4); /* Length */ + + /* XSDT address to be filled by guest linker */ + build_append_int_noprefix(tbl, 0, 8); /* XsdtAddress */ + /* We already validated our xsdt pointer */ + bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE, + tbl_off + 24, 8, + ACPI_BUILD_TABLE_FILE, + *rsdp_data->xsdt_tbl_offset); + + build_append_int_noprefix(tbl, 0, 1); /* Extended Checksum */ + build_append_int_noprefix(tbl, 0, 3); /* Reserved */ + + /* Extended checksum to be filled by Guest linker */ + bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, + tbl_off, 36, /* ACPI rev 2.0 RSDP size */ + 32); +} + /* Build rsdt table */ void build_rsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets, diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index 80d42e12ff..7bc7a72340 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -30,6 +30,7 @@ #include "hw/hw.h" #include "hw/i386/pc.h" #include "hw/pci/pci.h" +#include "hw/pci/pci_bridge.h" #include "hw/acpi/acpi.h" #include "sysemu/sysemu.h" #include "exec/address-spaces.h" @@ -153,6 +154,7 @@ static bool acpi_pcihp_pc_no_hotplug(AcpiPciHpState *s, PCIDevice *dev) static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slots) { + HotplugHandler *hotplug_ctrl; BusChild *kid, *next; int slot = ctz32(slots); PCIBus *bus = acpi_pcihp_find_hotplug_bus(s, bsel); @@ -170,7 +172,8 @@ static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slo PCIDevice *dev = PCI_DEVICE(qdev); if (PCI_SLOT(dev->devfn) == slot) { if (!acpi_pcihp_pc_no_hotplug(s, dev)) { - object_unparent(OBJECT(qdev)); + hotplug_ctrl = qdev_get_hotplug_handler(qdev); + hotplug_handler_unplug(hotplug_ctrl, qdev, &error_abort); } } } @@ -217,25 +220,48 @@ void acpi_pcihp_reset(AcpiPciHpState *s) acpi_pcihp_update(s); } -void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, - DeviceState *dev, Error **errp) +void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { - PCIDevice *pdev = PCI_DEVICE(dev); - int slot = PCI_SLOT(pdev->devfn); - int bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev)); - if (bsel < 0) { + /* Only hotplugged devices need the hotplug capability. */ + if (dev->hotplugged && + acpi_pcihp_get_bsel(pci_get_bus(PCI_DEVICE(dev))) < 0) { error_setg(errp, "Unsupported bus. Bus doesn't have property '" ACPI_PCIHP_PROP_BSEL "' set"); return; } +} + +void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, + DeviceState *dev, Error **errp) +{ + PCIDevice *pdev = PCI_DEVICE(dev); + int slot = PCI_SLOT(pdev->devfn); + int bsel; /* Don't send event when device is enabled during qemu machine creation: * it is present on boot, no hotplug event is necessary. We do send an * event when the device is disabled later. */ if (!dev->hotplugged) { + /* + * Overwrite the default hotplug handler with the ACPI PCI one + * for cold plugged bridges only. + */ + if (!s->legacy_piix && + object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { + PCIBus *sec = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); + + qbus_set_hotplug_handler(BUS(sec), DEVICE(hotplug_dev), + &error_abort); + /* We don't have to overwrite any other hotplug handler yet */ + assert(QLIST_EMPTY(&sec->child)); + } + return; } + bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev)); + g_assert(bsel >= 0); s->acpi_pcihp_pci_status[bsel].up |= (1U << slot); acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); } @@ -243,6 +269,13 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, DeviceState *dev, Error **errp) { + object_unparent(OBJECT(dev)); +} + +void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, + AcpiPciHpState *s, DeviceState *dev, + Error **errp) +{ PCIDevice *pdev = PCI_DEVICE(dev); int slot = PCI_SLOT(pdev->devfn); int bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev)); diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index e330f24c71..88f9a9ec09 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -173,6 +173,7 @@ static int vmstate_acpi_post_load(void *opaque, int version_id) PIIX4PMState *s = opaque; pm_io_space_update(s); + smbus_io_space_update(s); return 0; } @@ -370,6 +371,18 @@ static void piix4_pm_powerdown_req(Notifier *n, void *opaque) acpi_pm1_evt_power_down(&s->ar); } +static void piix4_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_pre_plug_cb(hotplug_dev, dev, errp); + } else if (!object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) && + !object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + error_setg(errp, "acpi: device pre plug request for not supported" + " device type: %s", object_get_typename(OBJECT(dev))); + } +} + static void piix4_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -392,8 +405,7 @@ static void piix4_device_plug_cb(HotplugHandler *hotplug_dev, acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp); } } else { - error_setg(errp, "acpi: device plug request for not supported device" - " type: %s", object_get_typename(OBJECT(dev))); + g_assert_not_reached(); } } @@ -407,8 +419,8 @@ static void piix4_device_unplug_request_cb(HotplugHandler *hotplug_dev, acpi_memory_unplug_request_cb(hotplug_dev, &s->acpi_memory_hotplug, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { - acpi_pcihp_device_unplug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev, - errp); + acpi_pcihp_device_unplug_request_cb(hotplug_dev, &s->acpi_pci_hotplug, + dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) && !s->cpu_hotplug_legacy) { acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp); @@ -426,6 +438,9 @@ static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev, if (s->acpi_memory_hotplug.is_enabled && object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { acpi_memory_unplug_cb(&s->acpi_memory_hotplug, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_unplug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev, + errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) && !s->cpu_hotplug_legacy) { acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp); @@ -435,15 +450,6 @@ static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev, } } -static void piix4_update_bus_hotplug(PCIBus *pci_bus, void *opaque) -{ - PIIX4PMState *s = opaque; - - /* pci_bus cannot outlive PIIX4PMState, because /machine keeps it alive - * and it's not hot-unpluggable */ - qbus_set_hotplug_handler(BUS(pci_bus), DEVICE(s), &error_abort); -} - static void piix4_pm_machine_ready(Notifier *n, void *opaque) { PIIX4PMState *s = container_of(n, PIIX4PMState, machine_ready); @@ -457,12 +463,6 @@ static void piix4_pm_machine_ready(Notifier *n, void *opaque) pci_conf[0x63] = 0x60; pci_conf[0x67] = (memory_region_present(io_as, 0x3f8) ? 0x08 : 0) | (memory_region_present(io_as, 0x2f8) ? 0x90 : 0); - - if (s->use_acpi_pci_hotplug) { - pci_for_each_bus(pci_get_bus(d), piix4_update_bus_hotplug, s); - } else { - piix4_update_bus_hotplug(pci_get_bus(d), s); - } } static void piix4_pm_add_propeties(PIIX4PMState *s) @@ -536,6 +536,7 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp) piix4_acpi_system_hot_add_init(pci_address_space_io(dev), pci_get_bus(dev), s); + qbus_set_hotplug_handler(BUS(pci_get_bus(dev)), DEVICE(s), &error_abort); piix4_pm_add_propeties(s); } @@ -702,6 +703,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data) */ dc->user_creatable = false; dc->hotpluggable = false; + hc->pre_plug = piix4_device_pre_plug_cb; hc->plug = piix4_device_plug_cb; hc->unplug_request = piix4_device_unplug_request_cb; hc->unplug = piix4_device_unplug_cb; diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 5785fb697c..95fad6f0ce 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -366,36 +366,6 @@ static void acpi_dsdt_add_power_button(Aml *scope) aml_append(scope, dev); } -/* RSDP */ -static GArray * -build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned xsdt_tbl_offset) -{ - AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp); - unsigned xsdt_pa_size = sizeof(rsdp->xsdt_physical_address); - unsigned xsdt_pa_offset = - (char *)&rsdp->xsdt_physical_address - rsdp_table->data; - - bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, rsdp_table, 16, - true /* fseg memory */); - - memcpy(&rsdp->signature, "RSD PTR ", sizeof(rsdp->signature)); - memcpy(rsdp->oem_id, ACPI_BUILD_APPNAME6, sizeof(rsdp->oem_id)); - rsdp->length = cpu_to_le32(sizeof(*rsdp)); - rsdp->revision = 0x02; - - /* Address to be filled by Guest linker */ - bios_linker_loader_add_pointer(linker, - ACPI_BUILD_RSDP_FILE, xsdt_pa_offset, xsdt_pa_size, - ACPI_BUILD_TABLE_FILE, xsdt_tbl_offset); - - /* Checksum to be filled by Guest linker */ - bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, - (char *)rsdp - rsdp_table->data, sizeof *rsdp, - (char *)&rsdp->checksum - rsdp_table->data); - - return rsdp_table; -} - static void build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) { @@ -854,7 +824,15 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) build_xsdt(tables_blob, tables->linker, table_offsets, NULL, NULL); /* RSDP is in FSEG memory, so allocate it separately */ - build_rsdp(tables->rsdp, tables->linker, xsdt); + { + AcpiRsdpData rsdp_data = { + .revision = 2, + .oem_id = ACPI_BUILD_APPNAME6, + .xsdt_tbl_offset = &xsdt, + .rsdt_tbl_offset = NULL, + }; + build_rsdp(tables->rsdp, tables->linker, &rsdp_data); + } /* Cleanup memory that's no longer used. */ g_array_free(table_offsets, true); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 5b678237b7..c2641e56ea 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -55,7 +55,7 @@ #include "hw/intc/arm_gic.h" #include "hw/intc/arm_gicv3_common.h" #include "kvm_arm.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "qapi/visitor.h" #include "standard-headers/linux/input.h" #include "hw/arm/smmuv3.h" diff --git a/hw/core/machine.c b/hw/core/machine.c index c51423b647..4439ea663f 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -653,8 +653,10 @@ static void machine_class_base_init(ObjectClass *oc, void *data) static void machine_initfn(Object *obj) { MachineState *ms = MACHINE(obj); + MachineClass *mc = MACHINE_GET_CLASS(obj); ms->kernel_irqchip_allowed = true; + ms->kernel_irqchip_split = mc->default_kernel_irqchip_split; ms->kvm_shadow_mem = -1; ms->dump_guest_core = true; ms->mem_merge = true; diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index bd84c4ea4c..943dc2654b 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -1297,3 +1297,179 @@ const PropertyInfo qdev_prop_off_auto_pcibar = { .set = set_enum, .set_default_value = set_default_value_enum, }; + +/* --- PCIELinkSpeed 2_5/5/8/16 -- */ + +static void get_prop_pcielinkspeed(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + PCIExpLinkSpeed *p = qdev_get_prop_ptr(dev, prop); + int speed; + + switch (*p) { + case QEMU_PCI_EXP_LNK_2_5GT: + speed = PCIE_LINK_SPEED_2_5; + break; + case QEMU_PCI_EXP_LNK_5GT: + speed = PCIE_LINK_SPEED_5; + break; + case QEMU_PCI_EXP_LNK_8GT: + speed = PCIE_LINK_SPEED_8; + break; + case QEMU_PCI_EXP_LNK_16GT: + speed = PCIE_LINK_SPEED_16; + break; + default: + /* Unreachable */ + abort(); + } + + visit_type_enum(v, prop->name, &speed, prop->info->enum_table, errp); +} + +static void set_prop_pcielinkspeed(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + PCIExpLinkSpeed *p = qdev_get_prop_ptr(dev, prop); + int speed; + Error *local_err = NULL; + + if (dev->realized) { + qdev_prop_set_after_realize(dev, name, errp); + return; + } + + visit_type_enum(v, prop->name, &speed, prop->info->enum_table, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + switch (speed) { + case PCIE_LINK_SPEED_2_5: + *p = QEMU_PCI_EXP_LNK_2_5GT; + break; + case PCIE_LINK_SPEED_5: + *p = QEMU_PCI_EXP_LNK_5GT; + break; + case PCIE_LINK_SPEED_8: + *p = QEMU_PCI_EXP_LNK_8GT; + break; + case PCIE_LINK_SPEED_16: + *p = QEMU_PCI_EXP_LNK_16GT; + break; + default: + /* Unreachable */ + abort(); + } +} + +const PropertyInfo qdev_prop_pcie_link_speed = { + .name = "PCIELinkSpeed", + .description = "2_5/5/8/16", + .enum_table = &PCIELinkSpeed_lookup, + .get = get_prop_pcielinkspeed, + .set = set_prop_pcielinkspeed, + .set_default_value = set_default_value_enum, +}; + +/* --- PCIELinkWidth 1/2/4/8/12/16/32 -- */ + +static void get_prop_pcielinkwidth(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + PCIExpLinkWidth *p = qdev_get_prop_ptr(dev, prop); + int width; + + switch (*p) { + case QEMU_PCI_EXP_LNK_X1: + width = PCIE_LINK_WIDTH_1; + break; + case QEMU_PCI_EXP_LNK_X2: + width = PCIE_LINK_WIDTH_2; + break; + case QEMU_PCI_EXP_LNK_X4: + width = PCIE_LINK_WIDTH_4; + break; + case QEMU_PCI_EXP_LNK_X8: + width = PCIE_LINK_WIDTH_8; + break; + case QEMU_PCI_EXP_LNK_X12: + width = PCIE_LINK_WIDTH_12; + break; + case QEMU_PCI_EXP_LNK_X16: + width = PCIE_LINK_WIDTH_16; + break; + case QEMU_PCI_EXP_LNK_X32: + width = PCIE_LINK_WIDTH_32; + break; + default: + /* Unreachable */ + abort(); + } + + visit_type_enum(v, prop->name, &width, prop->info->enum_table, errp); +} + +static void set_prop_pcielinkwidth(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + PCIExpLinkWidth *p = qdev_get_prop_ptr(dev, prop); + int width; + Error *local_err = NULL; + + if (dev->realized) { + qdev_prop_set_after_realize(dev, name, errp); + return; + } + + visit_type_enum(v, prop->name, &width, prop->info->enum_table, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + switch (width) { + case PCIE_LINK_WIDTH_1: + *p = QEMU_PCI_EXP_LNK_X1; + break; + case PCIE_LINK_WIDTH_2: + *p = QEMU_PCI_EXP_LNK_X2; + break; + case PCIE_LINK_WIDTH_4: + *p = QEMU_PCI_EXP_LNK_X4; + break; + case PCIE_LINK_WIDTH_8: + *p = QEMU_PCI_EXP_LNK_X8; + break; + case PCIE_LINK_WIDTH_12: + *p = QEMU_PCI_EXP_LNK_X12; + break; + case PCIE_LINK_WIDTH_16: + *p = QEMU_PCI_EXP_LNK_X16; + break; + case PCIE_LINK_WIDTH_32: + *p = QEMU_PCI_EXP_LNK_X32; + break; + default: + /* Unreachable */ + abort(); + } +} + +const PropertyInfo qdev_prop_pcie_link_width = { + .name = "PCIELinkWidth", + .description = "1/2/4/8/12/16/32", + .enum_table = &PCIELinkWidth_lookup, + .get = get_prop_pcielinkwidth, + .set = set_prop_pcielinkwidth, + .set_default_value = set_default_value_enum, +}; diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c index cece4aa495..faf76a8bc4 100644 --- a/hw/display/virtio-gpu-pci.c +++ b/hw/display/virtio-gpu-pci.c @@ -69,9 +69,8 @@ static void virtio_gpu_initfn(Object *obj) TYPE_VIRTIO_GPU); } -static const TypeInfo virtio_gpu_pci_info = { - .name = TYPE_VIRTIO_GPU_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_gpu_pci_info = { + .generic_name = TYPE_VIRTIO_GPU_PCI, .instance_size = sizeof(VirtIOGPUPCI), .instance_init = virtio_gpu_initfn, .class_init = virtio_gpu_pci_class_init, @@ -79,6 +78,6 @@ static const TypeInfo virtio_gpu_pci_info = { static void virtio_gpu_pci_register_types(void) { - type_register_static(&virtio_gpu_pci_info); + virtio_pci_types_register(&virtio_gpu_pci_info); } type_init(virtio_gpu_pci_register_types) diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c index ab2e369b28..8db4d916f2 100644 --- a/hw/display/virtio-vga.c +++ b/hw/display/virtio-vga.c @@ -207,9 +207,8 @@ static void virtio_vga_inst_initfn(Object *obj) TYPE_VIRTIO_GPU); } -static TypeInfo virtio_vga_info = { - .name = TYPE_VIRTIO_VGA, - .parent = TYPE_VIRTIO_PCI, +static VirtioPCIDeviceTypeInfo virtio_vga_info = { + .generic_name = TYPE_VIRTIO_VGA, .instance_size = sizeof(struct VirtIOVGA), .instance_init = virtio_vga_inst_initfn, .class_init = virtio_vga_class_init, @@ -217,7 +216,7 @@ static TypeInfo virtio_vga_info = { static void virtio_vga_register_types(void) { - type_register_static(&virtio_vga_info); + virtio_pci_types_register(&virtio_vga_info); } type_init(virtio_vga_register_types) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 236a20eaa8..14f757fc36 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2426,7 +2426,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker) IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu); assert(iommu); - if (iommu->intr_supported) { + if (x86_iommu_ir_supported(iommu)) { dmar_flags |= 0x1; /* Flags: 0x1: INT_REMAP */ } @@ -2499,7 +2499,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker) * When interrupt remapping is supported, we add a special IVHD device * for type IO-APIC. */ - if (x86_iommu_get_default()->intr_supported) { + if (x86_iommu_ir_supported(x86_iommu_get_default())) { ivhd_table_len += 8; } /* IVHD length */ @@ -2535,7 +2535,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker) * Linux IOMMU driver checks for the special IVHD device (type IO-APIC). * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059' */ - if (x86_iommu_get_default()->intr_supported) { + if (x86_iommu_ir_supported(x86_iommu_get_default())) { build_append_int_noprefix(table_data, (0x1ull << 56) | /* type IOAPIC */ (IOAPIC_SB_DEVID << 40) | /* IOAPIC devid */ @@ -2547,32 +2547,6 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker) "IVRS", table_data->len - iommu_start, 1, NULL, NULL); } -static GArray * -build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset) -{ - AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp); - unsigned rsdt_pa_size = sizeof(rsdp->rsdt_physical_address); - unsigned rsdt_pa_offset = - (char *)&rsdp->rsdt_physical_address - rsdp_table->data; - - bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, rsdp_table, 16, - true /* fseg memory */); - - memcpy(&rsdp->signature, "RSD PTR ", 8); - memcpy(rsdp->oem_id, ACPI_BUILD_APPNAME6, 6); - /* Address to be filled by Guest linker */ - bios_linker_loader_add_pointer(linker, - ACPI_BUILD_RSDP_FILE, rsdt_pa_offset, rsdt_pa_size, - ACPI_BUILD_TABLE_FILE, rsdt_tbl_offset); - - /* Checksum to be filled by Guest linker */ - bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, - (char *)rsdp - rsdp_table->data, sizeof *rsdp, - (char *)&rsdp->checksum - rsdp_table->data); - - return rsdp_table; -} - typedef struct AcpiBuildState { /* Copy of table in RAM (for patching). */ @@ -2729,7 +2703,25 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) slic_oem.id, slic_oem.table_id); /* RSDP is in FSEG memory, so allocate it separately */ - build_rsdp(tables->rsdp, tables->linker, rsdt); + { + AcpiRsdpData rsdp_data = { + .revision = 0, + .oem_id = ACPI_BUILD_APPNAME6, + .xsdt_tbl_offset = NULL, + .rsdt_tbl_offset = &rsdt, + }; + build_rsdp(tables->rsdp, tables->linker, &rsdp_data); + if (!pcmc->rsdp_in_ram) { + /* We used to allocate some extra space for RSDP revision 2 but + * only used the RSDP revision 0 space. The extra bytes were + * zeroed out and not used. + * Here we continue wasting those extra 16 bytes to make sure we + * don't break migration for machine types 2.2 and older due to + * RSDP blob size mismatch. + */ + build_append_int_noprefix(tables->rsdp, 0, 16); + } + } /* We'll expose it all to Guest so we want to reduce * chance of size changes. diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 353a810e6b..8ad707aba0 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1233,7 +1233,7 @@ static int amdvi_int_remap_msi(AMDVIState *iommu, } /* validate that we are configure with intremap=on */ - if (!X86_IOMMU_DEVICE(iommu)->intr_supported) { + if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) { trace_amdvi_err("Interrupt remapping is enabled in the guest but " "not in the host. Use intremap=on to enable interrupt " "remapping in amd-iommu."); diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index d97bcbc2f7..8b72735650 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -524,7 +524,6 @@ static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index, addr = s->root + index * sizeof(*re); if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) { - trace_vtd_re_invalid(re->rsvd, re->val); re->val = 0; return -VTD_FR_ROOT_TABLE_INV; } @@ -545,7 +544,6 @@ static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index, /* we have checked that root entry is present */ addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce); if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) { - trace_vtd_re_invalid(root->rsvd, root->val); return -VTD_FR_CONTEXT_TABLE_INV; } ce->lo = le64_to_cpu(ce->lo); @@ -630,16 +628,20 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu, break; case VTD_CONTEXT_TT_DEV_IOTLB: if (!x86_iommu->dt_supported) { + error_report_once("%s: DT specified but not supported", __func__); return false; } break; case VTD_CONTEXT_TT_PASS_THROUGH: if (!x86_iommu->pt_supported) { + error_report_once("%s: PT specified but not supported", __func__); return false; } break; default: /* Unknwon type */ + error_report_once("%s: unknown ce type: %"PRIu32, __func__, + vtd_ce_get_type(ce)); return false; } return true; @@ -1003,7 +1005,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, } if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(s->aw_bits))) { - trace_vtd_re_invalid(re.rsvd, re.val); + error_report_once("%s: invalid root entry: rsvd=0x%"PRIx64 + ", val=0x%"PRIx64" (reserved nonzero)", + __func__, re.rsvd, re.val); return -VTD_FR_ROOT_ENTRY_RSVD; } @@ -1020,19 +1024,23 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(s->aw_bits))) { - trace_vtd_ce_invalid(ce->hi, ce->lo); + error_report_once("%s: invalid context entry: hi=%"PRIx64 + ", lo=%"PRIx64" (reserved nonzero)", + __func__, ce->hi, ce->lo); return -VTD_FR_CONTEXT_ENTRY_RSVD; } /* Check if the programming of context-entry is valid */ if (!vtd_is_level_supported(s, vtd_ce_get_level(ce))) { - trace_vtd_ce_invalid(ce->hi, ce->lo); + error_report_once("%s: invalid context entry: hi=%"PRIx64 + ", lo=%"PRIx64" (level %d not supported)", + __func__, ce->hi, ce->lo, vtd_ce_get_level(ce)); return -VTD_FR_CONTEXT_ENTRY_INV; } /* Do translation type check */ if (!vtd_ce_type_check(x86_iommu, ce)) { - trace_vtd_ce_invalid(ce->hi, ce->lo); + /* Errors dumped in vtd_ce_type_check() */ return -VTD_FR_CONTEXT_ENTRY_INV; } @@ -1878,7 +1886,9 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) || (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) { - trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64 + " (reserved nonzero)", __func__, inv_desc->hi, + inv_desc->lo); return false; } if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) { @@ -1901,7 +1911,9 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) /* Interrupt flag */ vtd_generate_completion_event(s); } else { - trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64 + " (unknown type)", __func__, inv_desc->hi, + inv_desc->lo); return false; } return true; @@ -1913,7 +1925,9 @@ static bool vtd_process_context_cache_desc(IntelIOMMUState *s, uint16_t sid, fmask; if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) { - trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid cc inv desc: hi=%"PRIx64", lo=%"PRIx64 + " (reserved nonzero)", __func__, inv_desc->hi, + inv_desc->lo); return false; } switch (inv_desc->lo & VTD_INV_DESC_CC_G) { @@ -1932,7 +1946,9 @@ static bool vtd_process_context_cache_desc(IntelIOMMUState *s, break; default: - trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid cc inv desc: hi=%"PRIx64", lo=%"PRIx64 + " (invalid type)", __func__, inv_desc->hi, + inv_desc->lo); return false; } return true; @@ -1946,7 +1962,9 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) || (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) { - trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid iotlb inv desc: hi=0x%"PRIx64 + ", lo=0x%"PRIx64" (reserved bits unzero)\n", + __func__, inv_desc->hi, inv_desc->lo); return false; } @@ -1965,14 +1983,20 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi); am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi); if (am > VTD_MAMV) { - trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid iotlb inv desc: hi=0x%"PRIx64 + ", lo=0x%"PRIx64" (am=%u > VTD_MAMV=%u)\n", + __func__, inv_desc->hi, inv_desc->lo, + am, (unsigned)VTD_MAMV); return false; } vtd_iotlb_page_invalidate(s, domain_id, addr, am); break; default: - trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid iotlb inv desc: hi=0x%"PRIx64 + ", lo=0x%"PRIx64" (type mismatch: 0x%llx)\n", + __func__, inv_desc->hi, inv_desc->lo, + inv_desc->lo & VTD_INV_DESC_IOTLB_G); return false; } return true; @@ -2012,7 +2036,9 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) || (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) { - trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid dev-iotlb inv desc: hi=%"PRIx64 + ", lo=%"PRIx64" (reserved nonzero)", __func__, + inv_desc->hi, inv_desc->lo); return false; } @@ -2103,7 +2129,9 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) break; default: - trace_vtd_inv_desc_invalid(inv_desc.hi, inv_desc.lo); + error_report_once("%s: invalid inv desc: hi=%"PRIx64", lo=%"PRIx64 + " (unknown type)", __func__, inv_desc.hi, + inv_desc.lo); return false; } s->iq_head++; @@ -2540,7 +2568,7 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr, __func__, pci_bus_num(vtd_as->bus), VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn), - iotlb.iova); + addr); } return iotlb; @@ -2628,9 +2656,10 @@ static Property vtd_properties[] = { DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim, ON_OFF_AUTO_AUTO), DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false), - DEFINE_PROP_UINT8("x-aw-bits", IntelIOMMUState, aw_bits, + DEFINE_PROP_UINT8("aw-bits", IntelIOMMUState, aw_bits, VTD_HOST_ADDRESS_WIDTH), DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE), + DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true), DEFINE_PROP_END_OF_LIST(), }; @@ -3119,6 +3148,9 @@ static void vtd_init(IntelIOMMUState *s) s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS | VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits); + if (s->dma_drain) { + s->cap |= VTD_CAP_DRAIN; + } if (s->aw_bits == VTD_HOST_AW_48BIT) { s->cap |= VTD_CAP_SAGAW_48bit; } @@ -3137,7 +3169,7 @@ static void vtd_init(IntelIOMMUState *s) vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits); vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->aw_bits); - if (x86_iommu->intr_supported) { + if (x86_iommu_ir_supported(x86_iommu)) { s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV; if (s->intr_eim == ON_OFF_AUTO_ON) { s->ecap |= VTD_ECAP_EIM; @@ -3238,14 +3270,14 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) { X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); - if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu->intr_supported) { + if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu_ir_supported(x86_iommu)) { error_setg(errp, "eim=on cannot be selected without intremap=on"); return false; } if (s->intr_eim == ON_OFF_AUTO_AUTO) { s->intr_eim = (kvm_irqchip_in_kernel() || s->buggy_eim) - && x86_iommu->intr_supported ? + && x86_iommu_ir_supported(x86_iommu) ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; } if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) { diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index d084099ed9..00e9edbc66 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -203,6 +203,9 @@ #define VTD_CAP_MAMV (VTD_MAMV << 48) #define VTD_CAP_PSI (1ULL << 39) #define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35)) +#define VTD_CAP_DRAIN_WRITE (1ULL << 54) +#define VTD_CAP_DRAIN_READ (1ULL << 55) +#define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE) #define VTD_CAP_CM (1ULL << 7) /* Supported Adjusted Guest Address Widths */ diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 115bc2825c..f248662e97 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -37,7 +37,7 @@ #include "hw/pci/pci_bus.h" #include "hw/nvram/fw_cfg.h" #include "hw/timer/hpet.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "hw/loader.h" #include "elf.h" #include "multiboot.h" @@ -1244,7 +1244,7 @@ void pc_machine_done(Notifier *notifier, void *data) if (pcms->apic_id_limit > 255 && !xen_enabled()) { IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default()); - if (!iommu || !iommu->x86_iommu.intr_supported || + if (!iommu || !x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu)) || iommu->intr_eim != ON_OFF_AUTO_ON) { error_report("current -smp configuration requires " "Extended Interrupt Mode enabled. " diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 6981cfa740..7f1cb527b5 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -30,7 +30,7 @@ #include "hw/i386/pc.h" #include "hw/i386/apic.h" #include "hw/display/ramfb.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "hw/pci/pci.h" #include "hw/pci/pci_ids.h" #include "hw/usb.h" @@ -368,7 +368,7 @@ static void pc_compat_1_2(MachineState *machine) x86_cpu_change_kvm_default("kvm-pv-eoi", NULL); } -/* PC compat function for pc-0.10 to pc-0.13 */ +/* PC compat function for pc-0.12 and pc-0.13 */ static void pc_compat_0_13(MachineState *machine) { pc_compat_1_2(machine); @@ -834,6 +834,7 @@ static void pc_i440fx_0_15_machine_options(MachineClass *m) { pc_i440fx_1_0_machine_options(m); m->hw_version = "0.15"; + m->deprecation_reason = "use a newer machine type instead"; SET_MACHINE_COMPAT(m, PC_COMPAT_0_15); } @@ -951,73 +952,6 @@ static void pc_i440fx_0_12_machine_options(MachineClass *m) DEFINE_I440FX_MACHINE(v0_12, "pc-0.12", pc_compat_0_13, pc_i440fx_0_12_machine_options); - -#define PC_COMPAT_0_11 \ - PC_CPU_MODEL_IDS("0.11") \ - {\ - .driver = "virtio-blk-pci",\ - .property = "vectors",\ - .value = stringify(0),\ - },{\ - .driver = TYPE_PCI_DEVICE,\ - .property = "rombar",\ - .value = stringify(0),\ - },{\ - .driver = "ide-drive",\ - .property = "ver",\ - .value = "0.11",\ - },{\ - .driver = "scsi-disk",\ - .property = "ver",\ - .value = "0.11",\ - }, - -static void pc_i440fx_0_11_machine_options(MachineClass *m) -{ - pc_i440fx_0_12_machine_options(m); - m->hw_version = "0.11"; - m->deprecation_reason = "use a newer machine type instead"; - SET_MACHINE_COMPAT(m, PC_COMPAT_0_11); -} - -DEFINE_I440FX_MACHINE(v0_11, "pc-0.11", pc_compat_0_13, - pc_i440fx_0_11_machine_options); - - -#define PC_COMPAT_0_10 \ - PC_CPU_MODEL_IDS("0.10") \ - {\ - .driver = "virtio-blk-pci",\ - .property = "class",\ - .value = stringify(PCI_CLASS_STORAGE_OTHER),\ - },{\ - .driver = "virtio-serial-pci",\ - .property = "class",\ - .value = stringify(PCI_CLASS_DISPLAY_OTHER),\ - },{\ - .driver = "virtio-net-pci",\ - .property = "vectors",\ - .value = stringify(0),\ - },{\ - .driver = "ide-drive",\ - .property = "ver",\ - .value = "0.10",\ - },{\ - .driver = "scsi-disk",\ - .property = "ver",\ - .value = "0.10",\ - }, - -static void pc_i440fx_0_10_machine_options(MachineClass *m) -{ - pc_i440fx_0_11_machine_options(m); - m->hw_version = "0.10"; - SET_MACHINE_COMPAT(m, PC_COMPAT_0_10); -} - -DEFINE_I440FX_MACHINE(v0_10, "pc-0.10", pc_compat_0_13, - pc_i440fx_0_10_machine_options); - typedef struct { uint16_t gpu_device_id; uint16_t pch_device_id; diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 58459bdab5..0a3f3f18e4 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -47,7 +47,7 @@ #include "hw/i386/amd_iommu.h" #include "hw/i386/intel_iommu.h" #include "hw/display/ramfb.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "hw/ide/pci.h" #include "hw/ide/ahci.h" #include "hw/usb.h" @@ -304,6 +304,7 @@ static void pc_q35_machine_options(MachineClass *m) m->units_per_default_bus = 1; m->default_machine_opts = "firmware=bios-256k.bin"; m->default_display = "std"; + m->default_kernel_irqchip_split = true; m->no_floppy = 1; machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); @@ -323,6 +324,7 @@ DEFINE_Q35_MACHINE(v4_0, "pc-q35-4.0", NULL, static void pc_q35_3_1_machine_options(MachineClass *m) { pc_q35_4_0_machine_options(m); + m->default_kernel_irqchip_split = false; m->alias = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_3_1); } diff --git a/hw/i386/trace-events b/hw/i386/trace-events index 6ac347d18c..77244fc384 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -5,19 +5,15 @@ x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC inv # hw/i386/intel_iommu.c vtd_inv_desc(const char *type, uint64_t hi, uint64_t lo) "invalidate desc type %s high 0x%"PRIx64" low 0x%"PRIx64 -vtd_inv_desc_invalid(uint64_t hi, uint64_t lo) "invalid inv desc hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_inv_desc_cc_domain(uint16_t domain) "context invalidate domain 0x%"PRIx16 vtd_inv_desc_cc_global(void) "context invalidate globally" vtd_inv_desc_cc_device(uint8_t bus, uint8_t dev, uint8_t fn) "context invalidate device %02"PRIx8":%02"PRIx8".%02"PRIx8 vtd_inv_desc_cc_devices(uint16_t sid, uint16_t fmask) "context invalidate devices sid 0x%"PRIx16" fmask 0x%"PRIx16 -vtd_inv_desc_cc_invalid(uint64_t hi, uint64_t lo) "invalid context-cache desc hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_inv_desc_iotlb_global(void) "iotlb invalidate global" vtd_inv_desc_iotlb_domain(uint16_t domain) "iotlb invalidate whole domain 0x%"PRIx16 vtd_inv_desc_iotlb_pages(uint16_t domain, uint64_t addr, uint8_t mask) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8 -vtd_inv_desc_iotlb_invalid(uint64_t hi, uint64_t lo) "invalid iotlb desc hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write addr 0x%"PRIx64" data 0x%"PRIx32 vtd_inv_desc_wait_irq(const char *msg) "%s" -vtd_inv_desc_wait_invalid(uint64_t hi, uint64_t lo) "invalid wait desc hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_inv_desc_iec(uint32_t granularity, uint32_t index, uint32_t mask) "granularity 0x%"PRIx32" index 0x%"PRIx32" mask 0x%"PRIx32 vtd_inv_qi_enable(bool enable) "enabled %d" @@ -27,9 +23,7 @@ vtd_inv_qi_tail(uint16_t head) "write tail %d" vtd_inv_qi_fetch(void) "" vtd_context_cache_reset(void) "" vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present" -vtd_re_invalid(uint64_t hi, uint64_t lo) "invalid root entry hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present" -vtd_ce_invalid(uint64_t hi, uint64_t lo) "invalid context entry hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16 vtd_iotlb_page_update(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page update sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16 vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen) "IOTLB context hit bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32 diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index abc3c03158..d1534c1ae0 100644 --- a/hw/i386/x86-iommu.c +++ b/hw/i386/x86-iommu.c @@ -112,6 +112,7 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) PCMachineState *pcms = PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)); QLIST_INIT(&x86_iommu->iec_notifiers); + bool irq_all_kernel = kvm_irqchip_in_kernel() && !kvm_irqchip_is_split(); if (!pcms || !pcms->bus) { error_setg(errp, "Machine-type '%s' not supported by IOMMU", @@ -119,9 +120,14 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) return; } + /* If the user didn't specify IR, choose a default value for it */ + if (x86_iommu->intr_supported == ON_OFF_AUTO_AUTO) { + x86_iommu->intr_supported = irq_all_kernel ? + ON_OFF_AUTO_OFF : ON_OFF_AUTO_ON; + } + /* Both Intel and AMD IOMMU IR only support "kernel-irqchip={off|split}" */ - if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() && - !kvm_irqchip_is_split()) { + if (x86_iommu_ir_supported(x86_iommu) && irq_all_kernel) { error_setg(errp, "Interrupt Remapping cannot work with " "kernel-irqchip=on, please use 'split|off'."); return; @@ -135,7 +141,8 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) } static Property x86_iommu_properties[] = { - DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false), + DEFINE_PROP_ON_OFF_AUTO("intremap", X86IOMMUState, + intr_supported, ON_OFF_AUTO_AUTO), DEFINE_PROP_BOOL("device-iotlb", X86IOMMUState, dt_supported, false), DEFINE_PROP_BOOL("pt", X86IOMMUState, pt_supported, true), DEFINE_PROP_END_OF_LIST(), @@ -148,6 +155,11 @@ static void x86_iommu_class_init(ObjectClass *klass, void *data) dc->props = x86_iommu_properties; } +bool x86_iommu_ir_supported(X86IOMMUState *s) +{ + return s->intr_supported == ON_OFF_AUTO_ON; +} + static const TypeInfo x86_iommu_info = { .name = TYPE_X86_IOMMU_DEVICE, .parent = TYPE_SYS_BUS_DEVICE, diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs index 0e9963f5ee..301a8e972d 100644 --- a/hw/intc/Makefile.objs +++ b/hw/intc/Makefile.objs @@ -37,6 +37,8 @@ obj-$(CONFIG_SH4) += sh_intc.o obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o obj-$(CONFIG_XICS_KVM) += xics_kvm.o +obj-$(CONFIG_XIVE) += xive.o +obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o obj-$(CONFIG_POWERNV) += xics_pnv.o obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o obj-$(CONFIG_S390_FLIC) += s390_flic.o diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c new file mode 100644 index 0000000000..0e39c90cbd --- /dev/null +++ b/hw/intc/spapr_xive.c @@ -0,0 +1,1486 @@ +/* + * QEMU PowerPC sPAPR XIVE interrupt controller model + * + * Copyright (c) 2017-2018, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "target/ppc/cpu.h" +#include "sysemu/cpus.h" +#include "monitor/monitor.h" +#include "hw/ppc/fdt.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_xive.h" +#include "hw/ppc/xive.h" +#include "hw/ppc/xive_regs.h" + +/* + * XIVE Virtualization Controller BAR and Thread Managment BAR that we + * use for the ESB pages and the TIMA pages + */ +#define SPAPR_XIVE_VC_BASE 0x0006010000000000ull +#define SPAPR_XIVE_TM_BASE 0x0006030203180000ull + +/* + * The allocation of VP blocks is a complex operation in OPAL and the + * VP identifiers have a relation with the number of HW chips, the + * size of the VP blocks, VP grouping, etc. The QEMU sPAPR XIVE + * controller model does not have the same constraints and can use a + * simple mapping scheme of the CPU vcpu_id + * + * These identifiers are never returned to the OS. + */ + +#define SPAPR_XIVE_NVT_BASE 0x400 + +/* + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value + * to the controller block id value. It can nevertheless be changed + * for testing purpose. + */ +#define SPAPR_XIVE_BLOCK_ID 0x0 + +/* + * sPAPR NVT and END indexing helpers + */ +static uint32_t spapr_xive_nvt_to_target(uint8_t nvt_blk, uint32_t nvt_idx) +{ + return nvt_idx - SPAPR_XIVE_NVT_BASE; +} + +static void spapr_xive_cpu_to_nvt(PowerPCCPU *cpu, + uint8_t *out_nvt_blk, uint32_t *out_nvt_idx) +{ + assert(cpu); + + if (out_nvt_blk) { + *out_nvt_blk = SPAPR_XIVE_BLOCK_ID; + } + + if (out_nvt_blk) { + *out_nvt_idx = SPAPR_XIVE_NVT_BASE + cpu->vcpu_id; + } +} + +static int spapr_xive_target_to_nvt(uint32_t target, + uint8_t *out_nvt_blk, uint32_t *out_nvt_idx) +{ + PowerPCCPU *cpu = spapr_find_cpu(target); + + if (!cpu) { + return -1; + } + + spapr_xive_cpu_to_nvt(cpu, out_nvt_blk, out_nvt_idx); + return 0; +} + +/* + * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8 + * priorities per CPU + */ +static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio, + uint8_t *out_end_blk, uint32_t *out_end_idx) +{ + assert(cpu); + + if (out_end_blk) { + *out_end_blk = SPAPR_XIVE_BLOCK_ID; + } + + if (out_end_idx) { + *out_end_idx = (cpu->vcpu_id << 3) + prio; + } +} + +static int spapr_xive_target_to_end(uint32_t target, uint8_t prio, + uint8_t *out_end_blk, uint32_t *out_end_idx) +{ + PowerPCCPU *cpu = spapr_find_cpu(target); + + if (!cpu) { + return -1; + } + + spapr_xive_cpu_to_end(cpu, prio, out_end_blk, out_end_idx); + return 0; +} + +/* + * On sPAPR machines, use a simplified output for the XIVE END + * structure dumping only the information related to the OS EQ. + */ +static void spapr_xive_end_pic_print_info(sPAPRXive *xive, XiveEND *end, + Monitor *mon) +{ + uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); + uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1); + uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0); + uint32_t qentries = 1 << (qsize + 10); + uint32_t nvt = xive_get_field32(END_W6_NVT_INDEX, end->w6); + uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7); + + monitor_printf(mon, "%3d/%d % 6d/%5d ^%d", + spapr_xive_nvt_to_target(0, nvt), + priority, qindex, qentries, qgen); + + xive_end_queue_pic_print_info(end, 6, mon); + monitor_printf(mon, "]"); +} + +void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon) +{ + XiveSource *xsrc = &xive->source; + int i; + + monitor_printf(mon, " LSIN PQ EISN CPU/PRIO EQ\n"); + + for (i = 0; i < xive->nr_irqs; i++) { + uint8_t pq = xive_source_esb_get(xsrc, i); + XiveEAS *eas = &xive->eat[i]; + + if (!xive_eas_is_valid(eas)) { + continue; + } + + monitor_printf(mon, " %08x %s %c%c%c %s %08x ", i, + xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI", + pq & XIVE_ESB_VAL_P ? 'P' : '-', + pq & XIVE_ESB_VAL_Q ? 'Q' : '-', + xsrc->status[i] & XIVE_STATUS_ASSERTED ? 'A' : ' ', + xive_eas_is_masked(eas) ? "M" : " ", + (int) xive_get_field64(EAS_END_DATA, eas->w)); + + if (!xive_eas_is_masked(eas)) { + uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w); + XiveEND *end; + + assert(end_idx < xive->nr_ends); + end = &xive->endt[end_idx]; + + if (xive_end_is_valid(end)) { + spapr_xive_end_pic_print_info(xive, end, mon); + } + } + monitor_printf(mon, "\n"); + } +} + +static void spapr_xive_map_mmio(sPAPRXive *xive) +{ + sysbus_mmio_map(SYS_BUS_DEVICE(xive), 0, xive->vc_base); + sysbus_mmio_map(SYS_BUS_DEVICE(xive), 1, xive->end_base); + sysbus_mmio_map(SYS_BUS_DEVICE(xive), 2, xive->tm_base); +} + +/* + * When a Virtual Processor is scheduled to run on a HW thread, the + * hypervisor pushes its identifier in the OS CAM line. Emulate the + * same behavior under QEMU. + */ +void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx) +{ + uint8_t nvt_blk; + uint32_t nvt_idx; + uint32_t nvt_cam; + + spapr_xive_cpu_to_nvt(POWERPC_CPU(tctx->cs), &nvt_blk, &nvt_idx); + + nvt_cam = cpu_to_be32(TM_QW1W2_VO | xive_nvt_cam_line(nvt_blk, nvt_idx)); + memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &nvt_cam, 4); +} + +static void spapr_xive_end_reset(XiveEND *end) +{ + memset(end, 0, sizeof(*end)); + + /* switch off the escalation and notification ESBs */ + end->w1 = cpu_to_be32(END_W1_ESe_Q | END_W1_ESn_Q); +} + +static void spapr_xive_reset(void *dev) +{ + sPAPRXive *xive = SPAPR_XIVE(dev); + int i; + + /* + * The XiveSource has its own reset handler, which mask off all + * IRQs (!P|Q) + */ + + /* Mask all valid EASs in the IRQ number space. */ + for (i = 0; i < xive->nr_irqs; i++) { + XiveEAS *eas = &xive->eat[i]; + if (xive_eas_is_valid(eas)) { + eas->w = cpu_to_be64(EAS_VALID | EAS_MASKED); + } else { + eas->w = 0; + } + } + + /* Clear all ENDs */ + for (i = 0; i < xive->nr_ends; i++) { + spapr_xive_end_reset(&xive->endt[i]); + } +} + +static void spapr_xive_instance_init(Object *obj) +{ + sPAPRXive *xive = SPAPR_XIVE(obj); + + object_initialize(&xive->source, sizeof(xive->source), TYPE_XIVE_SOURCE); + object_property_add_child(obj, "source", OBJECT(&xive->source), NULL); + + object_initialize(&xive->end_source, sizeof(xive->end_source), + TYPE_XIVE_END_SOURCE); + object_property_add_child(obj, "end_source", OBJECT(&xive->end_source), + NULL); +} + +static void spapr_xive_realize(DeviceState *dev, Error **errp) +{ + sPAPRXive *xive = SPAPR_XIVE(dev); + XiveSource *xsrc = &xive->source; + XiveENDSource *end_xsrc = &xive->end_source; + Error *local_err = NULL; + + if (!xive->nr_irqs) { + error_setg(errp, "Number of interrupt needs to be greater 0"); + return; + } + + if (!xive->nr_ends) { + error_setg(errp, "Number of interrupt needs to be greater 0"); + return; + } + + /* + * Initialize the internal sources, for IPIs and virtual devices. + */ + object_property_set_int(OBJECT(xsrc), xive->nr_irqs, "nr-irqs", + &error_fatal); + object_property_add_const_link(OBJECT(xsrc), "xive", OBJECT(xive), + &error_fatal); + object_property_set_bool(OBJECT(xsrc), true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* + * Initialize the END ESB source + */ + object_property_set_int(OBJECT(end_xsrc), xive->nr_irqs, "nr-ends", + &error_fatal); + object_property_add_const_link(OBJECT(end_xsrc), "xive", OBJECT(xive), + &error_fatal); + object_property_set_bool(OBJECT(end_xsrc), true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* Set the mapping address of the END ESB pages after the source ESBs */ + xive->end_base = xive->vc_base + (1ull << xsrc->esb_shift) * xsrc->nr_irqs; + + /* + * Allocate the routing tables + */ + xive->eat = g_new0(XiveEAS, xive->nr_irqs); + xive->endt = g_new0(XiveEND, xive->nr_ends); + + /* TIMA initialization */ + memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive, + "xive.tima", 4ull << TM_SHIFT); + + /* Define all XIVE MMIO regions on SysBus */ + sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio); + sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio); + sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio); + + /* Map all regions */ + spapr_xive_map_mmio(xive); + + qemu_register_reset(spapr_xive_reset, dev); +} + +static int spapr_xive_get_eas(XiveRouter *xrtr, uint8_t eas_blk, + uint32_t eas_idx, XiveEAS *eas) +{ + sPAPRXive *xive = SPAPR_XIVE(xrtr); + + if (eas_idx >= xive->nr_irqs) { + return -1; + } + + *eas = xive->eat[eas_idx]; + return 0; +} + +static int spapr_xive_get_end(XiveRouter *xrtr, + uint8_t end_blk, uint32_t end_idx, XiveEND *end) +{ + sPAPRXive *xive = SPAPR_XIVE(xrtr); + + if (end_idx >= xive->nr_ends) { + return -1; + } + + memcpy(end, &xive->endt[end_idx], sizeof(XiveEND)); + return 0; +} + +static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + uint8_t word_number) +{ + sPAPRXive *xive = SPAPR_XIVE(xrtr); + + if (end_idx >= xive->nr_ends) { + return -1; + } + + memcpy(&xive->endt[end_idx], end, sizeof(XiveEND)); + return 0; +} + +static int spapr_xive_get_nvt(XiveRouter *xrtr, + uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt) +{ + uint32_t vcpu_id = spapr_xive_nvt_to_target(nvt_blk, nvt_idx); + PowerPCCPU *cpu = spapr_find_cpu(vcpu_id); + + if (!cpu) { + /* TODO: should we assert() if we can find a NVT ? */ + return -1; + } + + /* + * sPAPR does not maintain a NVT table. Return that the NVT is + * valid if we have found a matching CPU + */ + nvt->w0 = cpu_to_be32(NVT_W0_VALID); + return 0; +} + +static int spapr_xive_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, + uint32_t nvt_idx, XiveNVT *nvt, + uint8_t word_number) +{ + /* + * We don't need to write back to the NVTs because the sPAPR + * machine should never hit a non-scheduled NVT. It should never + * get called. + */ + g_assert_not_reached(); +} + +static const VMStateDescription vmstate_spapr_xive_end = { + .name = TYPE_SPAPR_XIVE "/end", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField []) { + VMSTATE_UINT32(w0, XiveEND), + VMSTATE_UINT32(w1, XiveEND), + VMSTATE_UINT32(w2, XiveEND), + VMSTATE_UINT32(w3, XiveEND), + VMSTATE_UINT32(w4, XiveEND), + VMSTATE_UINT32(w5, XiveEND), + VMSTATE_UINT32(w6, XiveEND), + VMSTATE_UINT32(w7, XiveEND), + VMSTATE_END_OF_LIST() + }, +}; + +static const VMStateDescription vmstate_spapr_xive_eas = { + .name = TYPE_SPAPR_XIVE "/eas", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField []) { + VMSTATE_UINT64(w, XiveEAS), + VMSTATE_END_OF_LIST() + }, +}; + +static const VMStateDescription vmstate_spapr_xive = { + .name = TYPE_SPAPR_XIVE, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL), + VMSTATE_STRUCT_VARRAY_POINTER_UINT32(eat, sPAPRXive, nr_irqs, + vmstate_spapr_xive_eas, XiveEAS), + VMSTATE_STRUCT_VARRAY_POINTER_UINT32(endt, sPAPRXive, nr_ends, + vmstate_spapr_xive_end, XiveEND), + VMSTATE_END_OF_LIST() + }, +}; + +static Property spapr_xive_properties[] = { + DEFINE_PROP_UINT32("nr-irqs", sPAPRXive, nr_irqs, 0), + DEFINE_PROP_UINT32("nr-ends", sPAPRXive, nr_ends, 0), + DEFINE_PROP_UINT64("vc-base", sPAPRXive, vc_base, SPAPR_XIVE_VC_BASE), + DEFINE_PROP_UINT64("tm-base", sPAPRXive, tm_base, SPAPR_XIVE_TM_BASE), + DEFINE_PROP_END_OF_LIST(), +}; + +static void spapr_xive_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass); + + dc->desc = "sPAPR XIVE Interrupt Controller"; + dc->props = spapr_xive_properties; + dc->realize = spapr_xive_realize; + dc->vmsd = &vmstate_spapr_xive; + + xrc->get_eas = spapr_xive_get_eas; + xrc->get_end = spapr_xive_get_end; + xrc->write_end = spapr_xive_write_end; + xrc->get_nvt = spapr_xive_get_nvt; + xrc->write_nvt = spapr_xive_write_nvt; +} + +static const TypeInfo spapr_xive_info = { + .name = TYPE_SPAPR_XIVE, + .parent = TYPE_XIVE_ROUTER, + .instance_init = spapr_xive_instance_init, + .instance_size = sizeof(sPAPRXive), + .class_init = spapr_xive_class_init, +}; + +static void spapr_xive_register_types(void) +{ + type_register_static(&spapr_xive_info); +} + +type_init(spapr_xive_register_types) + +bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi) +{ + XiveSource *xsrc = &xive->source; + + if (lisn >= xive->nr_irqs) { + return false; + } + + xive->eat[lisn].w |= cpu_to_be64(EAS_VALID); + xive_source_irq_set(xsrc, lisn, lsi); + return true; +} + +bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn) +{ + XiveSource *xsrc = &xive->source; + + if (lisn >= xive->nr_irqs) { + return false; + } + + xive->eat[lisn].w &= cpu_to_be64(~EAS_VALID); + xive_source_irq_set(xsrc, lisn, false); + return true; +} + +qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn) +{ + XiveSource *xsrc = &xive->source; + + if (lisn >= xive->nr_irqs) { + return NULL; + } + + /* The sPAPR machine/device should have claimed the IRQ before */ + assert(xive_eas_is_valid(&xive->eat[lisn])); + + return xive_source_qirq(xsrc, lisn); +} + +/* + * XIVE hcalls + * + * The terminology used by the XIVE hcalls is the following : + * + * TARGET vCPU number + * EQ Event Queue assigned by OS to receive event data + * ESB page for source interrupt management + * LISN Logical Interrupt Source Number identifying a source in the + * machine + * EISN Effective Interrupt Source Number used by guest OS to + * identify source in the guest + * + * The EAS, END, NVT structures are not exposed. + */ + +/* + * Linux hosts under OPAL reserve priority 7 for their own escalation + * interrupts (DD2.X POWER9). So we only allow the guest to use + * priorities [0..6]. + */ +static bool spapr_xive_priority_is_reserved(uint8_t priority) +{ + switch (priority) { + case 0 ... 6: + return false; + case 7: /* OPAL escalation queue */ + default: + return true; + } +} + +/* + * The H_INT_GET_SOURCE_INFO hcall() is used to obtain the logical + * real address of the MMIO page through which the Event State Buffer + * entry associated with the value of the "lisn" parameter is managed. + * + * Parameters: + * Input + * - R4: "flags" + * Bits 0-63 reserved + * - R5: "lisn" is per "interrupts", "interrupt-map", or + * "ibm,xive-lisn-ranges" properties, or as returned by the + * ibm,query-interrupt-source-number RTAS call, or as returned + * by the H_ALLOCATE_VAS_WINDOW hcall + * + * Output + * - R4: "flags" + * Bits 0-59: Reserved + * Bit 60: H_INT_ESB must be used for Event State Buffer + * management + * Bit 61: 1 == LSI 0 == MSI + * Bit 62: the full function page supports trigger + * Bit 63: Store EOI Supported + * - R5: Logical Real address of full function Event State Buffer + * management page, -1 if H_INT_ESB hcall flag is set to 1. + * - R6: Logical Real Address of trigger only Event State Buffer + * management page or -1. + * - R7: Power of 2 page size for the ESB management pages returned in + * R5 and R6. + */ + +#define SPAPR_XIVE_SRC_H_INT_ESB PPC_BIT(60) /* ESB manage with H_INT_ESB */ +#define SPAPR_XIVE_SRC_LSI PPC_BIT(61) /* Virtual LSI type */ +#define SPAPR_XIVE_SRC_TRIGGER PPC_BIT(62) /* Trigger and management + on same page */ +#define SPAPR_XIVE_SRC_STORE_EOI PPC_BIT(63) /* Store EOI support */ + +static target_ulong h_int_get_source_info(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + sPAPRXive *xive = spapr->xive; + XiveSource *xsrc = &xive->source; + target_ulong flags = args[0]; + target_ulong lisn = args[1]; + + if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { + return H_FUNCTION; + } + + if (flags) { + return H_PARAMETER; + } + + if (lisn >= xive->nr_irqs) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n", + lisn); + return H_P2; + } + + if (!xive_eas_is_valid(&xive->eat[lisn])) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n", + lisn); + return H_P2; + } + + /* + * All sources are emulated under the main XIVE object and share + * the same characteristics. + */ + args[0] = 0; + if (!xive_source_esb_has_2page(xsrc)) { + args[0] |= SPAPR_XIVE_SRC_TRIGGER; + } + if (xsrc->esb_flags & XIVE_SRC_STORE_EOI) { + args[0] |= SPAPR_XIVE_SRC_STORE_EOI; + } + + /* + * Force the use of the H_INT_ESB hcall in case of an LSI + * interrupt. This is necessary under KVM to re-trigger the + * interrupt if the level is still asserted + */ + if (xive_source_irq_is_lsi(xsrc, lisn)) { + args[0] |= SPAPR_XIVE_SRC_H_INT_ESB | SPAPR_XIVE_SRC_LSI; + } + + if (!(args[0] & SPAPR_XIVE_SRC_H_INT_ESB)) { + args[1] = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn); + } else { + args[1] = -1; + } + + if (xive_source_esb_has_2page(xsrc) && + !(args[0] & SPAPR_XIVE_SRC_H_INT_ESB)) { + args[2] = xive->vc_base + xive_source_esb_page(xsrc, lisn); + } else { + args[2] = -1; + } + + if (xive_source_esb_has_2page(xsrc)) { + args[3] = xsrc->esb_shift - 1; + } else { + args[3] = xsrc->esb_shift; + } + + return H_SUCCESS; +} + +/* + * The H_INT_SET_SOURCE_CONFIG hcall() is used to assign a Logical + * Interrupt Source to a target. The Logical Interrupt Source is + * designated with the "lisn" parameter and the target is designated + * with the "target" and "priority" parameters. Upon return from the + * hcall(), no additional interrupts will be directed to the old EQ. + * + * Parameters: + * Input: + * - R4: "flags" + * Bits 0-61: Reserved + * Bit 62: set the "eisn" in the EAS + * Bit 63: masks the interrupt source in the hardware interrupt + * control structure. An interrupt masked by this mechanism will + * be dropped, but it's source state bits will still be + * set. There is no race-free way of unmasking and restoring the + * source. Thus this should only be used in interrupts that are + * also masked at the source, and only in cases where the + * interrupt is not meant to be used for a large amount of time + * because no valid target exists for it for example + * - R5: "lisn" is per "interrupts", "interrupt-map", or + * "ibm,xive-lisn-ranges" properties, or as returned by the + * ibm,query-interrupt-source-number RTAS call, or as returned by + * the H_ALLOCATE_VAS_WINDOW hcall + * - R6: "target" is per "ibm,ppc-interrupt-server#s" or + * "ibm,ppc-interrupt-gserver#s" + * - R7: "priority" is a valid priority not in + * "ibm,plat-res-int-priorities" + * - R8: "eisn" is the guest EISN associated with the "lisn" + * + * Output: + * - None + */ + +#define SPAPR_XIVE_SRC_SET_EISN PPC_BIT(62) +#define SPAPR_XIVE_SRC_MASK PPC_BIT(63) + +static target_ulong h_int_set_source_config(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + sPAPRXive *xive = spapr->xive; + XiveEAS eas, new_eas; + target_ulong flags = args[0]; + target_ulong lisn = args[1]; + target_ulong target = args[2]; + target_ulong priority = args[3]; + target_ulong eisn = args[4]; + uint8_t end_blk; + uint32_t end_idx; + + if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { + return H_FUNCTION; + } + + if (flags & ~(SPAPR_XIVE_SRC_SET_EISN | SPAPR_XIVE_SRC_MASK)) { + return H_PARAMETER; + } + + if (lisn >= xive->nr_irqs) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n", + lisn); + return H_P2; + } + + eas = xive->eat[lisn]; + if (!xive_eas_is_valid(&eas)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n", + lisn); + return H_P2; + } + + /* priority 0xff is used to reset the EAS */ + if (priority == 0xff) { + new_eas.w = cpu_to_be64(EAS_VALID | EAS_MASKED); + goto out; + } + + if (flags & SPAPR_XIVE_SRC_MASK) { + new_eas.w = eas.w | cpu_to_be64(EAS_MASKED); + } else { + new_eas.w = eas.w & cpu_to_be64(~EAS_MASKED); + } + + if (spapr_xive_priority_is_reserved(priority)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld + " is reserved\n", priority); + return H_P4; + } + + /* + * Validate that "target" is part of the list of threads allocated + * to the partition. For that, find the END corresponding to the + * target. + */ + if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) { + return H_P3; + } + + new_eas.w = xive_set_field64(EAS_END_BLOCK, new_eas.w, end_blk); + new_eas.w = xive_set_field64(EAS_END_INDEX, new_eas.w, end_idx); + + if (flags & SPAPR_XIVE_SRC_SET_EISN) { + new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn); + } + +out: + xive->eat[lisn] = new_eas; + return H_SUCCESS; +} + +/* + * The H_INT_GET_SOURCE_CONFIG hcall() is used to determine to which + * target/priority pair is assigned to the specified Logical Interrupt + * Source. + * + * Parameters: + * Input: + * - R4: "flags" + * Bits 0-63 Reserved + * - R5: "lisn" is per "interrupts", "interrupt-map", or + * "ibm,xive-lisn-ranges" properties, or as returned by the + * ibm,query-interrupt-source-number RTAS call, or as + * returned by the H_ALLOCATE_VAS_WINDOW hcall + * + * Output: + * - R4: Target to which the specified Logical Interrupt Source is + * assigned + * - R5: Priority to which the specified Logical Interrupt Source is + * assigned + * - R6: EISN for the specified Logical Interrupt Source (this will be + * equivalent to the LISN if not changed by H_INT_SET_SOURCE_CONFIG) + */ +static target_ulong h_int_get_source_config(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + sPAPRXive *xive = spapr->xive; + target_ulong flags = args[0]; + target_ulong lisn = args[1]; + XiveEAS eas; + XiveEND *end; + uint8_t nvt_blk; + uint32_t end_idx, nvt_idx; + + if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { + return H_FUNCTION; + } + + if (flags) { + return H_PARAMETER; + } + + if (lisn >= xive->nr_irqs) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n", + lisn); + return H_P2; + } + + eas = xive->eat[lisn]; + if (!xive_eas_is_valid(&eas)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n", + lisn); + return H_P2; + } + + /* EAS_END_BLOCK is unused on sPAPR */ + end_idx = xive_get_field64(EAS_END_INDEX, eas.w); + + assert(end_idx < xive->nr_ends); + end = &xive->endt[end_idx]; + + nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6); + nvt_idx = xive_get_field32(END_W6_NVT_INDEX, end->w6); + args[0] = spapr_xive_nvt_to_target(nvt_blk, nvt_idx); + + if (xive_eas_is_masked(&eas)) { + args[1] = 0xff; + } else { + args[1] = xive_get_field32(END_W7_F0_PRIORITY, end->w7); + } + + args[2] = xive_get_field64(EAS_END_DATA, eas.w); + + return H_SUCCESS; +} + +/* + * The H_INT_GET_QUEUE_INFO hcall() is used to get the logical real + * address of the notification management page associated with the + * specified target and priority. + * + * Parameters: + * Input: + * - R4: "flags" + * Bits 0-63 Reserved + * - R5: "target" is per "ibm,ppc-interrupt-server#s" or + * "ibm,ppc-interrupt-gserver#s" + * - R6: "priority" is a valid priority not in + * "ibm,plat-res-int-priorities" + * + * Output: + * - R4: Logical real address of notification page + * - R5: Power of 2 page size of the notification page + */ +static target_ulong h_int_get_queue_info(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + sPAPRXive *xive = spapr->xive; + XiveENDSource *end_xsrc = &xive->end_source; + target_ulong flags = args[0]; + target_ulong target = args[1]; + target_ulong priority = args[2]; + XiveEND *end; + uint8_t end_blk; + uint32_t end_idx; + + if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { + return H_FUNCTION; + } + + if (flags) { + return H_PARAMETER; + } + + /* + * H_STATE should be returned if a H_INT_RESET is in progress. + * This is not needed when running the emulation under QEMU + */ + + if (spapr_xive_priority_is_reserved(priority)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld + " is reserved\n", priority); + return H_P3; + } + + /* + * Validate that "target" is part of the list of threads allocated + * to the partition. For that, find the END corresponding to the + * target. + */ + if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) { + return H_P2; + } + + assert(end_idx < xive->nr_ends); + end = &xive->endt[end_idx]; + + args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx; + if (xive_end_is_enqueue(end)) { + args[1] = xive_get_field32(END_W0_QSIZE, end->w0) + 12; + } else { + args[1] = 0; + } + + return H_SUCCESS; +} + +/* + * The H_INT_SET_QUEUE_CONFIG hcall() is used to set or reset a EQ for + * a given "target" and "priority". It is also used to set the + * notification config associated with the EQ. An EQ size of 0 is + * used to reset the EQ config for a given target and priority. If + * resetting the EQ config, the END associated with the given "target" + * and "priority" will be changed to disable queueing. + * + * Upon return from the hcall(), no additional interrupts will be + * directed to the old EQ (if one was set). The old EQ (if one was + * set) should be investigated for interrupts that occurred prior to + * or during the hcall(). + * + * Parameters: + * Input: + * - R4: "flags" + * Bits 0-62: Reserved + * Bit 63: Unconditional Notify (n) per the XIVE spec + * - R5: "target" is per "ibm,ppc-interrupt-server#s" or + * "ibm,ppc-interrupt-gserver#s" + * - R6: "priority" is a valid priority not in + * "ibm,plat-res-int-priorities" + * - R7: "eventQueue": The logical real address of the start of the EQ + * - R8: "eventQueueSize": The power of 2 EQ size per "ibm,xive-eq-sizes" + * + * Output: + * - None + */ + +#define SPAPR_XIVE_END_ALWAYS_NOTIFY PPC_BIT(63) + +static target_ulong h_int_set_queue_config(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + sPAPRXive *xive = spapr->xive; + target_ulong flags = args[0]; + target_ulong target = args[1]; + target_ulong priority = args[2]; + target_ulong qpage = args[3]; + target_ulong qsize = args[4]; + XiveEND end; + uint8_t end_blk, nvt_blk; + uint32_t end_idx, nvt_idx; + + if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { + return H_FUNCTION; + } + + if (flags & ~SPAPR_XIVE_END_ALWAYS_NOTIFY) { + return H_PARAMETER; + } + + /* + * H_STATE should be returned if a H_INT_RESET is in progress. + * This is not needed when running the emulation under QEMU + */ + + if (spapr_xive_priority_is_reserved(priority)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld + " is reserved\n", priority); + return H_P3; + } + + /* + * Validate that "target" is part of the list of threads allocated + * to the partition. For that, find the END corresponding to the + * target. + */ + + if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) { + return H_P2; + } + + assert(end_idx < xive->nr_ends); + memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND)); + + switch (qsize) { + case 12: + case 16: + case 21: + case 24: + end.w2 = cpu_to_be32((qpage >> 32) & 0x0fffffff); + end.w3 = cpu_to_be32(qpage & 0xffffffff); + end.w0 |= cpu_to_be32(END_W0_ENQUEUE); + end.w0 = xive_set_field32(END_W0_QSIZE, end.w0, qsize - 12); + break; + case 0: + /* reset queue and disable queueing */ + spapr_xive_end_reset(&end); + goto out; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid EQ size %"PRIx64"\n", + qsize); + return H_P5; + } + + if (qsize) { + hwaddr plen = 1 << qsize; + void *eq; + + /* + * Validate the guest EQ. We should also check that the queue + * has been zeroed by the OS. + */ + eq = address_space_map(CPU(cpu)->as, qpage, &plen, true, + MEMTXATTRS_UNSPECIFIED); + if (plen != 1 << qsize) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to map EQ @0x%" + HWADDR_PRIx "\n", qpage); + return H_P4; + } + address_space_unmap(CPU(cpu)->as, eq, plen, true, plen); + } + + /* "target" should have been validated above */ + if (spapr_xive_target_to_nvt(target, &nvt_blk, &nvt_idx)) { + g_assert_not_reached(); + } + + /* + * Ensure the priority and target are correctly set (they will not + * be right after allocation) + */ + end.w6 = xive_set_field32(END_W6_NVT_BLOCK, 0ul, nvt_blk) | + xive_set_field32(END_W6_NVT_INDEX, 0ul, nvt_idx); + end.w7 = xive_set_field32(END_W7_F0_PRIORITY, 0ul, priority); + + if (flags & SPAPR_XIVE_END_ALWAYS_NOTIFY) { + end.w0 |= cpu_to_be32(END_W0_UCOND_NOTIFY); + } else { + end.w0 &= cpu_to_be32((uint32_t)~END_W0_UCOND_NOTIFY); + } + + /* + * The generation bit for the END starts at 1 and The END page + * offset counter starts at 0. + */ + end.w1 = cpu_to_be32(END_W1_GENERATION) | + xive_set_field32(END_W1_PAGE_OFF, 0ul, 0ul); + end.w0 |= cpu_to_be32(END_W0_VALID); + + /* + * TODO: issue syncs required to ensure all in-flight interrupts + * are complete on the old END + */ + +out: + /* Update END */ + memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND)); + return H_SUCCESS; +} + +/* + * The H_INT_GET_QUEUE_CONFIG hcall() is used to get a EQ for a given + * target and priority. + * + * Parameters: + * Input: + * - R4: "flags" + * Bits 0-62: Reserved + * Bit 63: Debug: Return debug data + * - R5: "target" is per "ibm,ppc-interrupt-server#s" or + * "ibm,ppc-interrupt-gserver#s" + * - R6: "priority" is a valid priority not in + * "ibm,plat-res-int-priorities" + * + * Output: + * - R4: "flags": + * Bits 0-61: Reserved + * Bit 62: The value of Event Queue Generation Number (g) per + * the XIVE spec if "Debug" = 1 + * Bit 63: The value of Unconditional Notify (n) per the XIVE spec + * - R5: The logical real address of the start of the EQ + * - R6: The power of 2 EQ size per "ibm,xive-eq-sizes" + * - R7: The value of Event Queue Offset Counter per XIVE spec + * if "Debug" = 1, else 0 + * + */ + +#define SPAPR_XIVE_END_DEBUG PPC_BIT(63) + +static target_ulong h_int_get_queue_config(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + sPAPRXive *xive = spapr->xive; + target_ulong flags = args[0]; + target_ulong target = args[1]; + target_ulong priority = args[2]; + XiveEND *end; + uint8_t end_blk; + uint32_t end_idx; + + if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { + return H_FUNCTION; + } + + if (flags & ~SPAPR_XIVE_END_DEBUG) { + return H_PARAMETER; + } + + /* + * H_STATE should be returned if a H_INT_RESET is in progress. + * This is not needed when running the emulation under QEMU + */ + + if (spapr_xive_priority_is_reserved(priority)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld + " is reserved\n", priority); + return H_P3; + } + + /* + * Validate that "target" is part of the list of threads allocated + * to the partition. For that, find the END corresponding to the + * target. + */ + if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) { + return H_P2; + } + + assert(end_idx < xive->nr_ends); + end = &xive->endt[end_idx]; + + args[0] = 0; + if (xive_end_is_notify(end)) { + args[0] |= SPAPR_XIVE_END_ALWAYS_NOTIFY; + } + + if (xive_end_is_enqueue(end)) { + args[1] = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 + | be32_to_cpu(end->w3); + args[2] = xive_get_field32(END_W0_QSIZE, end->w0) + 12; + } else { + args[1] = 0; + args[2] = 0; + } + + /* TODO: do we need any locking on the END ? */ + if (flags & SPAPR_XIVE_END_DEBUG) { + /* Load the event queue generation number into the return flags */ + args[0] |= (uint64_t)xive_get_field32(END_W1_GENERATION, end->w1) << 62; + + /* Load R7 with the event queue offset counter */ + args[3] = xive_get_field32(END_W1_PAGE_OFF, end->w1); + } else { + args[3] = 0; + } + + return H_SUCCESS; +} + +/* + * The H_INT_SET_OS_REPORTING_LINE hcall() is used to set the + * reporting cache line pair for the calling thread. The reporting + * cache lines will contain the OS interrupt context when the OS + * issues a CI store byte to @TIMA+0xC10 to acknowledge the OS + * interrupt. The reporting cache lines can be reset by inputting -1 + * in "reportingLine". Issuing the CI store byte without reporting + * cache lines registered will result in the data not being accessible + * to the OS. + * + * Parameters: + * Input: + * - R4: "flags" + * Bits 0-63: Reserved + * - R5: "reportingLine": The logical real address of the reporting cache + * line pair + * + * Output: + * - None + */ +static target_ulong h_int_set_os_reporting_line(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { + return H_FUNCTION; + } + + /* + * H_STATE should be returned if a H_INT_RESET is in progress. + * This is not needed when running the emulation under QEMU + */ + + /* TODO: H_INT_SET_OS_REPORTING_LINE */ + return H_FUNCTION; +} + +/* + * The H_INT_GET_OS_REPORTING_LINE hcall() is used to get the logical + * real address of the reporting cache line pair set for the input + * "target". If no reporting cache line pair has been set, -1 is + * returned. + * + * Parameters: + * Input: + * - R4: "flags" + * Bits 0-63: Reserved + * - R5: "target" is per "ibm,ppc-interrupt-server#s" or + * "ibm,ppc-interrupt-gserver#s" + * - R6: "reportingLine": The logical real address of the reporting + * cache line pair + * + * Output: + * - R4: The logical real address of the reporting line if set, else -1 + */ +static target_ulong h_int_get_os_reporting_line(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { + return H_FUNCTION; + } + + /* + * H_STATE should be returned if a H_INT_RESET is in progress. + * This is not needed when running the emulation under QEMU + */ + + /* TODO: H_INT_GET_OS_REPORTING_LINE */ + return H_FUNCTION; +} + +/* + * The H_INT_ESB hcall() is used to issue a load or store to the ESB + * page for the input "lisn". This hcall is only supported for LISNs + * that have the ESB hcall flag set to 1 when returned from hcall() + * H_INT_GET_SOURCE_INFO. + * + * Parameters: + * Input: + * - R4: "flags" + * Bits 0-62: Reserved + * bit 63: Store: Store=1, store operation, else load operation + * - R5: "lisn" is per "interrupts", "interrupt-map", or + * "ibm,xive-lisn-ranges" properties, or as returned by the + * ibm,query-interrupt-source-number RTAS call, or as + * returned by the H_ALLOCATE_VAS_WINDOW hcall + * - R6: "esbOffset" is the offset into the ESB page for the load or + * store operation + * - R7: "storeData" is the data to write for a store operation + * + * Output: + * - R4: The value of the load if load operation, else -1 + */ + +#define SPAPR_XIVE_ESB_STORE PPC_BIT(63) + +static target_ulong h_int_esb(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + sPAPRXive *xive = spapr->xive; + XiveEAS eas; + target_ulong flags = args[0]; + target_ulong lisn = args[1]; + target_ulong offset = args[2]; + target_ulong data = args[3]; + hwaddr mmio_addr; + XiveSource *xsrc = &xive->source; + + if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { + return H_FUNCTION; + } + + if (flags & ~SPAPR_XIVE_ESB_STORE) { + return H_PARAMETER; + } + + if (lisn >= xive->nr_irqs) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n", + lisn); + return H_P2; + } + + eas = xive->eat[lisn]; + if (!xive_eas_is_valid(&eas)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n", + lisn); + return H_P2; + } + + if (offset > (1ull << xsrc->esb_shift)) { + return H_P3; + } + + mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset; + + if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8, + (flags & SPAPR_XIVE_ESB_STORE))) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%" + HWADDR_PRIx "\n", mmio_addr); + return H_HARDWARE; + } + args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data; + return H_SUCCESS; +} + +/* + * The H_INT_SYNC hcall() is used to issue hardware syncs that will + * ensure any in flight events for the input lisn are in the event + * queue. + * + * Parameters: + * Input: + * - R4: "flags" + * Bits 0-63: Reserved + * - R5: "lisn" is per "interrupts", "interrupt-map", or + * "ibm,xive-lisn-ranges" properties, or as returned by the + * ibm,query-interrupt-source-number RTAS call, or as + * returned by the H_ALLOCATE_VAS_WINDOW hcall + * + * Output: + * - None + */ +static target_ulong h_int_sync(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + sPAPRXive *xive = spapr->xive; + XiveEAS eas; + target_ulong flags = args[0]; + target_ulong lisn = args[1]; + + if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { + return H_FUNCTION; + } + + if (flags) { + return H_PARAMETER; + } + + if (lisn >= xive->nr_irqs) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n", + lisn); + return H_P2; + } + + eas = xive->eat[lisn]; + if (!xive_eas_is_valid(&eas)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n", + lisn); + return H_P2; + } + + /* + * H_STATE should be returned if a H_INT_RESET is in progress. + * This is not needed when running the emulation under QEMU + */ + + /* This is not real hardware. Nothing to be done */ + return H_SUCCESS; +} + +/* + * The H_INT_RESET hcall() is used to reset all of the partition's + * interrupt exploitation structures to their initial state. This + * means losing all previously set interrupt state set via + * H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG. + * + * Parameters: + * Input: + * - R4: "flags" + * Bits 0-63: Reserved + * + * Output: + * - None + */ +static target_ulong h_int_reset(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + sPAPRXive *xive = spapr->xive; + target_ulong flags = args[0]; + + if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { + return H_FUNCTION; + } + + if (flags) { + return H_PARAMETER; + } + + device_reset(DEVICE(xive)); + return H_SUCCESS; +} + +void spapr_xive_hcall_init(sPAPRMachineState *spapr) +{ + spapr_register_hypercall(H_INT_GET_SOURCE_INFO, h_int_get_source_info); + spapr_register_hypercall(H_INT_SET_SOURCE_CONFIG, h_int_set_source_config); + spapr_register_hypercall(H_INT_GET_SOURCE_CONFIG, h_int_get_source_config); + spapr_register_hypercall(H_INT_GET_QUEUE_INFO, h_int_get_queue_info); + spapr_register_hypercall(H_INT_SET_QUEUE_CONFIG, h_int_set_queue_config); + spapr_register_hypercall(H_INT_GET_QUEUE_CONFIG, h_int_get_queue_config); + spapr_register_hypercall(H_INT_SET_OS_REPORTING_LINE, + h_int_set_os_reporting_line); + spapr_register_hypercall(H_INT_GET_OS_REPORTING_LINE, + h_int_get_os_reporting_line); + spapr_register_hypercall(H_INT_ESB, h_int_esb); + spapr_register_hypercall(H_INT_SYNC, h_int_sync); + spapr_register_hypercall(H_INT_RESET, h_int_reset); +} + +void spapr_dt_xive(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt, + uint32_t phandle) +{ + sPAPRXive *xive = spapr->xive; + int node; + uint64_t timas[2 * 2]; + /* Interrupt number ranges for the IPIs */ + uint32_t lisn_ranges[] = { + cpu_to_be32(0), + cpu_to_be32(nr_servers), + }; + /* + * EQ size - the sizes of pages supported by the system 4K, 64K, + * 2M, 16M. We only advertise 64K for the moment. + */ + uint32_t eq_sizes[] = { + cpu_to_be32(16), /* 64K */ + }; + /* + * The following array is in sync with the reserved priorities + * defined by the 'spapr_xive_priority_is_reserved' routine. + */ + uint32_t plat_res_int_priorities[] = { + cpu_to_be32(7), /* start */ + cpu_to_be32(0xf8), /* count */ + }; + gchar *nodename; + + /* Thread Interrupt Management Area : User (ring 3) and OS (ring 2) */ + timas[0] = cpu_to_be64(xive->tm_base + + XIVE_TM_USER_PAGE * (1ull << TM_SHIFT)); + timas[1] = cpu_to_be64(1ull << TM_SHIFT); + timas[2] = cpu_to_be64(xive->tm_base + + XIVE_TM_OS_PAGE * (1ull << TM_SHIFT)); + timas[3] = cpu_to_be64(1ull << TM_SHIFT); + + nodename = g_strdup_printf("interrupt-controller@%" PRIx64, + xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT)); + _FDT(node = fdt_add_subnode(fdt, 0, nodename)); + g_free(nodename); + + _FDT(fdt_setprop_string(fdt, node, "device_type", "power-ivpe")); + _FDT(fdt_setprop(fdt, node, "reg", timas, sizeof(timas))); + + _FDT(fdt_setprop_string(fdt, node, "compatible", "ibm,power-ivpe")); + _FDT(fdt_setprop(fdt, node, "ibm,xive-eq-sizes", eq_sizes, + sizeof(eq_sizes))); + _FDT(fdt_setprop(fdt, node, "ibm,xive-lisn-ranges", lisn_ranges, + sizeof(lisn_ranges))); + + /* For Linux to link the LSIs to the interrupt controller. */ + _FDT(fdt_setprop(fdt, node, "interrupt-controller", NULL, 0)); + _FDT(fdt_setprop_cell(fdt, node, "#interrupt-cells", 2)); + + /* For SLOF */ + _FDT(fdt_setprop_cell(fdt, node, "linux,phandle", phandle)); + _FDT(fdt_setprop_cell(fdt, node, "phandle", phandle)); + + /* + * The "ibm,plat-res-int-priorities" property defines the priority + * ranges reserved by the hypervisor + */ + _FDT(fdt_setprop(fdt, 0, "ibm,plat-res-int-priorities", + plat_res_int_priorities, sizeof(plat_res_int_priorities))); +} diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index 2e27b92b87..f67d3c80bf 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -244,7 +244,8 @@ void xics_spapr_init(sPAPRMachineState *spapr) spapr_register_hypercall(H_IPOLL, h_ipoll); } -void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle) +void spapr_dt_xics(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt, + uint32_t phandle) { uint32_t interrupt_server_ranges_prop[] = { 0, cpu_to_be32(nr_servers), diff --git a/hw/intc/xive.c b/hw/intc/xive.c new file mode 100644 index 0000000000..ea33494338 --- /dev/null +++ b/hw/intc/xive.c @@ -0,0 +1,1599 @@ +/* + * QEMU PowerPC XIVE interrupt controller model + * + * Copyright (c) 2017-2018, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "target/ppc/cpu.h" +#include "sysemu/cpus.h" +#include "sysemu/dma.h" +#include "hw/qdev-properties.h" +#include "monitor/monitor.h" +#include "hw/ppc/xive.h" +#include "hw/ppc/xive_regs.h" + +/* + * XIVE Thread Interrupt Management context + */ + +/* + * Convert a priority number to an Interrupt Pending Buffer (IPB) + * register, which indicates a pending interrupt at the priority + * corresponding to the bit number + */ +static uint8_t priority_to_ipb(uint8_t priority) +{ + return priority > XIVE_PRIORITY_MAX ? + 0 : 1 << (XIVE_PRIORITY_MAX - priority); +} + +/* + * Convert an Interrupt Pending Buffer (IPB) register to a Pending + * Interrupt Priority Register (PIPR), which contains the priority of + * the most favored pending notification. + */ +static uint8_t ipb_to_pipr(uint8_t ibp) +{ + return ibp ? clz32((uint32_t)ibp << 24) : 0xff; +} + +static void ipb_update(uint8_t *regs, uint8_t priority) +{ + regs[TM_IPB] |= priority_to_ipb(priority); + regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]); +} + +static uint8_t exception_mask(uint8_t ring) +{ + switch (ring) { + case TM_QW1_OS: + return TM_QW1_NSR_EO; + default: + g_assert_not_reached(); + } +} + +static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring) +{ + uint8_t *regs = &tctx->regs[ring]; + uint8_t nsr = regs[TM_NSR]; + uint8_t mask = exception_mask(ring); + + qemu_irq_lower(tctx->output); + + if (regs[TM_NSR] & mask) { + uint8_t cppr = regs[TM_PIPR]; + + regs[TM_CPPR] = cppr; + + /* Reset the pending buffer bit */ + regs[TM_IPB] &= ~priority_to_ipb(cppr); + regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]); + + /* Drop Exception bit */ + regs[TM_NSR] &= ~mask; + } + + return (nsr << 8) | regs[TM_CPPR]; +} + +static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring) +{ + uint8_t *regs = &tctx->regs[ring]; + + if (regs[TM_PIPR] < regs[TM_CPPR]) { + regs[TM_NSR] |= exception_mask(ring); + qemu_irq_raise(tctx->output); + } +} + +static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) +{ + if (cppr > XIVE_PRIORITY_MAX) { + cppr = 0xff; + } + + tctx->regs[ring + TM_CPPR] = cppr; + + /* CPPR has changed, check if we need to raise a pending exception */ + xive_tctx_notify(tctx, ring); +} + +/* + * XIVE Thread Interrupt Management Area (TIMA) + */ + +/* + * Define an access map for each page of the TIMA that we will use in + * the memory region ops to filter values when doing loads and stores + * of raw registers values + * + * Registers accessibility bits : + * + * 0x0 - no access + * 0x1 - write only + * 0x2 - read only + * 0x3 - read/write + */ + +static const uint8_t xive_tm_hw_view[] = { + /* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, + /* QW-1 OS */ 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 0, 0, 0, 0, + /* QW-2 POOL */ 0, 0, 3, 3, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, + /* QW-3 PHYS */ 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 0, 3, 3, 3, 3, 0, +}; + +static const uint8_t xive_tm_hv_view[] = { + /* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, + /* QW-1 OS */ 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 0, 0, 0, 0, + /* QW-2 POOL */ 0, 0, 3, 3, 0, 0, 0, 0, 0, 3, 3, 3, 0, 0, 0, 0, + /* QW-3 PHYS */ 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 0, 3, 0, 0, 0, 0, +}; + +static const uint8_t xive_tm_os_view[] = { + /* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, + /* QW-1 OS */ 2, 3, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, + /* QW-2 POOL */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* QW-3 PHYS */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static const uint8_t xive_tm_user_view[] = { + /* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* QW-1 OS */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* QW-2 POOL */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* QW-3 PHYS */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/* + * Overall TIMA access map for the thread interrupt management context + * registers + */ +static const uint8_t *xive_tm_views[] = { + [XIVE_TM_HW_PAGE] = xive_tm_hw_view, + [XIVE_TM_HV_PAGE] = xive_tm_hv_view, + [XIVE_TM_OS_PAGE] = xive_tm_os_view, + [XIVE_TM_USER_PAGE] = xive_tm_user_view, +}; + +/* + * Computes a register access mask for a given offset in the TIMA + */ +static uint64_t xive_tm_mask(hwaddr offset, unsigned size, bool write) +{ + uint8_t page_offset = (offset >> TM_SHIFT) & 0x3; + uint8_t reg_offset = offset & 0x3F; + uint8_t reg_mask = write ? 0x1 : 0x2; + uint64_t mask = 0x0; + int i; + + for (i = 0; i < size; i++) { + if (xive_tm_views[page_offset][reg_offset + i] & reg_mask) { + mask |= (uint64_t) 0xff << (8 * (size - i - 1)); + } + } + + return mask; +} + +static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value, + unsigned size) +{ + uint8_t ring_offset = offset & 0x30; + uint8_t reg_offset = offset & 0x3F; + uint64_t mask = xive_tm_mask(offset, size, true); + int i; + + /* + * Only 4 or 8 bytes stores are allowed and the User ring is + * excluded + */ + if (size < 4 || !mask || ring_offset == TM_QW0_USER) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%" + HWADDR_PRIx"\n", offset); + return; + } + + /* + * Use the register offset for the raw values and filter out + * reserved values + */ + for (i = 0; i < size; i++) { + uint8_t byte_mask = (mask >> (8 * (size - i - 1))); + if (byte_mask) { + tctx->regs[reg_offset + i] = (value >> (8 * (size - i - 1))) & + byte_mask; + } + } +} + +static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size) +{ + uint8_t ring_offset = offset & 0x30; + uint8_t reg_offset = offset & 0x3F; + uint64_t mask = xive_tm_mask(offset, size, false); + uint64_t ret; + int i; + + /* + * Only 4 or 8 bytes loads are allowed and the User ring is + * excluded + */ + if (size < 4 || !mask || ring_offset == TM_QW0_USER) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%" + HWADDR_PRIx"\n", offset); + return -1; + } + + /* Use the register offset for the raw values */ + ret = 0; + for (i = 0; i < size; i++) { + ret |= (uint64_t) tctx->regs[reg_offset + i] << (8 * (size - i - 1)); + } + + /* filter out reserved values */ + return ret & mask; +} + +/* + * The TM context is mapped twice within each page. Stores and loads + * to the first mapping below 2K write and read the specified values + * without modification. The second mapping above 2K performs specific + * state changes (side effects) in addition to setting/returning the + * interrupt management area context of the processor thread. + */ +static uint64_t xive_tm_ack_os_reg(XiveTCTX *tctx, hwaddr offset, unsigned size) +{ + return xive_tctx_accept(tctx, TM_QW1_OS); +} + +static void xive_tm_set_os_cppr(XiveTCTX *tctx, hwaddr offset, + uint64_t value, unsigned size) +{ + xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff); +} + +/* + * Adjust the IPB to allow a CPU to process event queues of other + * priorities during one physical interrupt cycle. + */ +static void xive_tm_set_os_pending(XiveTCTX *tctx, hwaddr offset, + uint64_t value, unsigned size) +{ + ipb_update(&tctx->regs[TM_QW1_OS], value & 0xff); + xive_tctx_notify(tctx, TM_QW1_OS); +} + +/* + * Define a mapping of "special" operations depending on the TIMA page + * offset and the size of the operation. + */ +typedef struct XiveTmOp { + uint8_t page_offset; + uint32_t op_offset; + unsigned size; + void (*write_handler)(XiveTCTX *tctx, hwaddr offset, uint64_t value, + unsigned size); + uint64_t (*read_handler)(XiveTCTX *tctx, hwaddr offset, unsigned size); +} XiveTmOp; + +static const XiveTmOp xive_tm_operations[] = { + /* + * MMIOs below 2K : raw values and special operations without side + * effects + */ + { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive_tm_set_os_cppr, NULL }, + + /* MMIOs above 2K : special operations with side effects */ + { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, NULL, xive_tm_ack_os_reg }, + { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL }, +}; + +static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write) +{ + uint8_t page_offset = (offset >> TM_SHIFT) & 0x3; + uint32_t op_offset = offset & 0xFFF; + int i; + + for (i = 0; i < ARRAY_SIZE(xive_tm_operations); i++) { + const XiveTmOp *xto = &xive_tm_operations[i]; + + /* Accesses done from a more privileged TIMA page is allowed */ + if (xto->page_offset >= page_offset && + xto->op_offset == op_offset && + xto->size == size && + ((write && xto->write_handler) || (!write && xto->read_handler))) { + return xto; + } + } + return NULL; +} + +/* + * TIMA MMIO handlers + */ +static void xive_tm_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + PowerPCCPU *cpu = POWERPC_CPU(current_cpu); + XiveTCTX *tctx = XIVE_TCTX(cpu->intc); + const XiveTmOp *xto; + + /* + * TODO: check V bit in Q[0-3]W2, check PTER bit associated with CPU + */ + + /* + * First, check for special operations in the 2K region + */ + if (offset & 0x800) { + xto = xive_tm_find_op(offset, size, true); + if (!xto) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA" + "@%"HWADDR_PRIx"\n", offset); + } else { + xto->write_handler(tctx, offset, value, size); + } + return; + } + + /* + * Then, for special operations in the region below 2K. + */ + xto = xive_tm_find_op(offset, size, true); + if (xto) { + xto->write_handler(tctx, offset, value, size); + return; + } + + /* + * Finish with raw access to the register values + */ + xive_tm_raw_write(tctx, offset, value, size); +} + +static uint64_t xive_tm_read(void *opaque, hwaddr offset, unsigned size) +{ + PowerPCCPU *cpu = POWERPC_CPU(current_cpu); + XiveTCTX *tctx = XIVE_TCTX(cpu->intc); + const XiveTmOp *xto; + + /* + * TODO: check V bit in Q[0-3]W2, check PTER bit associated with CPU + */ + + /* + * First, check for special operations in the 2K region + */ + if (offset & 0x800) { + xto = xive_tm_find_op(offset, size, false); + if (!xto) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA" + "@%"HWADDR_PRIx"\n", offset); + return -1; + } + return xto->read_handler(tctx, offset, size); + } + + /* + * Then, for special operations in the region below 2K. + */ + xto = xive_tm_find_op(offset, size, false); + if (xto) { + return xto->read_handler(tctx, offset, size); + } + + /* + * Finish with raw access to the register values + */ + return xive_tm_raw_read(tctx, offset, size); +} + +const MemoryRegionOps xive_tm_ops = { + .read = xive_tm_read, + .write = xive_tm_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + }, +}; + +static inline uint32_t xive_tctx_word2(uint8_t *ring) +{ + return *((uint32_t *) &ring[TM_WORD2]); +} + +static char *xive_tctx_ring_print(uint8_t *ring) +{ + uint32_t w2 = xive_tctx_word2(ring); + + return g_strdup_printf("%02x %02x %02x %02x %02x " + "%02x %02x %02x %08x", + ring[TM_NSR], ring[TM_CPPR], ring[TM_IPB], ring[TM_LSMFB], + ring[TM_ACK_CNT], ring[TM_INC], ring[TM_AGE], ring[TM_PIPR], + be32_to_cpu(w2)); +} + +static const char * const xive_tctx_ring_names[] = { + "USER", "OS", "POOL", "PHYS", +}; + +void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon) +{ + int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1; + int i; + + monitor_printf(mon, "CPU[%04x]: QW NSR CPPR IPB LSMFB ACK# INC AGE PIPR" + " W2\n", cpu_index); + + for (i = 0; i < XIVE_TM_RING_COUNT; i++) { + char *s = xive_tctx_ring_print(&tctx->regs[i * XIVE_TM_RING_SIZE]); + monitor_printf(mon, "CPU[%04x]: %4s %s\n", cpu_index, + xive_tctx_ring_names[i], s); + g_free(s); + } +} + +static void xive_tctx_reset(void *dev) +{ + XiveTCTX *tctx = XIVE_TCTX(dev); + + memset(tctx->regs, 0, sizeof(tctx->regs)); + + /* Set some defaults */ + tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF; + tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF; + tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF; + + /* + * Initialize PIPR to 0xFF to avoid phantom interrupts when the + * CPPR is first set. + */ + tctx->regs[TM_QW1_OS + TM_PIPR] = + ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]); +} + +static void xive_tctx_realize(DeviceState *dev, Error **errp) +{ + XiveTCTX *tctx = XIVE_TCTX(dev); + PowerPCCPU *cpu; + CPUPPCState *env; + Object *obj; + Error *local_err = NULL; + + obj = object_property_get_link(OBJECT(dev), "cpu", &local_err); + if (!obj) { + error_propagate(errp, local_err); + error_prepend(errp, "required link 'cpu' not found: "); + return; + } + + cpu = POWERPC_CPU(obj); + tctx->cs = CPU(obj); + + env = &cpu->env; + switch (PPC_INPUT(env)) { + case PPC_FLAGS_INPUT_POWER7: + tctx->output = env->irq_inputs[POWER7_INPUT_INT]; + break; + + default: + error_setg(errp, "XIVE interrupt controller does not support " + "this CPU bus model"); + return; + } + + qemu_register_reset(xive_tctx_reset, dev); +} + +static void xive_tctx_unrealize(DeviceState *dev, Error **errp) +{ + qemu_unregister_reset(xive_tctx_reset, dev); +} + +static const VMStateDescription vmstate_xive_tctx = { + .name = TYPE_XIVE_TCTX, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_BUFFER(regs, XiveTCTX), + VMSTATE_END_OF_LIST() + }, +}; + +static void xive_tctx_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "XIVE Interrupt Thread Context"; + dc->realize = xive_tctx_realize; + dc->unrealize = xive_tctx_unrealize; + dc->vmsd = &vmstate_xive_tctx; +} + +static const TypeInfo xive_tctx_info = { + .name = TYPE_XIVE_TCTX, + .parent = TYPE_DEVICE, + .instance_size = sizeof(XiveTCTX), + .class_init = xive_tctx_class_init, +}; + +Object *xive_tctx_create(Object *cpu, XiveRouter *xrtr, Error **errp) +{ + Error *local_err = NULL; + Object *obj; + + obj = object_new(TYPE_XIVE_TCTX); + object_property_add_child(cpu, TYPE_XIVE_TCTX, obj, &error_abort); + object_unref(obj); + object_property_add_const_link(obj, "cpu", cpu, &error_abort); + object_property_set_bool(obj, true, "realized", &local_err); + if (local_err) { + goto error; + } + + return obj; + +error: + object_unparent(obj); + error_propagate(errp, local_err); + return NULL; +} + +/* + * XIVE ESB helpers + */ + +static uint8_t xive_esb_set(uint8_t *pq, uint8_t value) +{ + uint8_t old_pq = *pq & 0x3; + + *pq &= ~0x3; + *pq |= value & 0x3; + + return old_pq; +} + +static bool xive_esb_trigger(uint8_t *pq) +{ + uint8_t old_pq = *pq & 0x3; + + switch (old_pq) { + case XIVE_ESB_RESET: + xive_esb_set(pq, XIVE_ESB_PENDING); + return true; + case XIVE_ESB_PENDING: + case XIVE_ESB_QUEUED: + xive_esb_set(pq, XIVE_ESB_QUEUED); + return false; + case XIVE_ESB_OFF: + xive_esb_set(pq, XIVE_ESB_OFF); + return false; + default: + g_assert_not_reached(); + } +} + +static bool xive_esb_eoi(uint8_t *pq) +{ + uint8_t old_pq = *pq & 0x3; + + switch (old_pq) { + case XIVE_ESB_RESET: + case XIVE_ESB_PENDING: + xive_esb_set(pq, XIVE_ESB_RESET); + return false; + case XIVE_ESB_QUEUED: + xive_esb_set(pq, XIVE_ESB_PENDING); + return true; + case XIVE_ESB_OFF: + xive_esb_set(pq, XIVE_ESB_OFF); + return false; + default: + g_assert_not_reached(); + } +} + +/* + * XIVE Interrupt Source (or IVSE) + */ + +uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno) +{ + assert(srcno < xsrc->nr_irqs); + + return xsrc->status[srcno] & 0x3; +} + +uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq) +{ + assert(srcno < xsrc->nr_irqs); + + return xive_esb_set(&xsrc->status[srcno], pq); +} + +/* + * Returns whether the event notification should be forwarded. + */ +static bool xive_source_lsi_trigger(XiveSource *xsrc, uint32_t srcno) +{ + uint8_t old_pq = xive_source_esb_get(xsrc, srcno); + + xsrc->status[srcno] |= XIVE_STATUS_ASSERTED; + + switch (old_pq) { + case XIVE_ESB_RESET: + xive_source_esb_set(xsrc, srcno, XIVE_ESB_PENDING); + return true; + default: + return false; + } +} + +/* + * Returns whether the event notification should be forwarded. + */ +static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno) +{ + bool ret; + + assert(srcno < xsrc->nr_irqs); + + ret = xive_esb_trigger(&xsrc->status[srcno]); + + if (xive_source_irq_is_lsi(xsrc, srcno) && + xive_source_esb_get(xsrc, srcno) == XIVE_ESB_QUEUED) { + qemu_log_mask(LOG_GUEST_ERROR, + "XIVE: queued an event on LSI IRQ %d\n", srcno); + } + + return ret; +} + +/* + * Returns whether the event notification should be forwarded. + */ +static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno) +{ + bool ret; + + assert(srcno < xsrc->nr_irqs); + + ret = xive_esb_eoi(&xsrc->status[srcno]); + + /* + * LSI sources do not set the Q bit but they can still be + * asserted, in which case we should forward a new event + * notification + */ + if (xive_source_irq_is_lsi(xsrc, srcno) && + xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { + ret = xive_source_lsi_trigger(xsrc, srcno); + } + + return ret; +} + +/* + * Forward the source event notification to the Router + */ +static void xive_source_notify(XiveSource *xsrc, int srcno) +{ + XiveNotifierClass *xnc = XIVE_NOTIFIER_GET_CLASS(xsrc->xive); + + if (xnc->notify) { + xnc->notify(xsrc->xive, srcno); + } +} + +/* + * In a two pages ESB MMIO setting, even page is the trigger page, odd + * page is for management + */ +static inline bool addr_is_even(hwaddr addr, uint32_t shift) +{ + return !((addr >> shift) & 1); +} + +static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr) +{ + return xive_source_esb_has_2page(xsrc) && + addr_is_even(addr, xsrc->esb_shift - 1); +} + +/* + * ESB MMIO loads + * Trigger page Management/EOI page + * + * ESB MMIO setting 2 pages 1 or 2 pages + * + * 0x000 .. 0x3FF -1 EOI and return 0|1 + * 0x400 .. 0x7FF -1 EOI and return 0|1 + * 0x800 .. 0xBFF -1 return PQ + * 0xC00 .. 0xCFF -1 return PQ and atomically PQ=00 + * 0xD00 .. 0xDFF -1 return PQ and atomically PQ=01 + * 0xE00 .. 0xDFF -1 return PQ and atomically PQ=10 + * 0xF00 .. 0xDFF -1 return PQ and atomically PQ=11 + */ +static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size) +{ + XiveSource *xsrc = XIVE_SOURCE(opaque); + uint32_t offset = addr & 0xFFF; + uint32_t srcno = addr >> xsrc->esb_shift; + uint64_t ret = -1; + + /* In a two pages ESB MMIO setting, trigger page should not be read */ + if (xive_source_is_trigger_page(xsrc, addr)) { + qemu_log_mask(LOG_GUEST_ERROR, + "XIVE: invalid load on IRQ %d trigger page at " + "0x%"HWADDR_PRIx"\n", srcno, addr); + return -1; + } + + switch (offset) { + case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF: + ret = xive_source_esb_eoi(xsrc, srcno); + + /* Forward the source event notification for routing */ + if (ret) { + xive_source_notify(xsrc, srcno); + } + break; + + case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF: + ret = xive_source_esb_get(xsrc, srcno); + break; + + case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF: + case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF: + case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF: + case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF: + ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3); + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n", + offset); + } + + return ret; +} + +/* + * ESB MMIO stores + * Trigger page Management/EOI page + * + * ESB MMIO setting 2 pages 1 or 2 pages + * + * 0x000 .. 0x3FF Trigger Trigger + * 0x400 .. 0x7FF Trigger EOI + * 0x800 .. 0xBFF Trigger undefined + * 0xC00 .. 0xCFF Trigger PQ=00 + * 0xD00 .. 0xDFF Trigger PQ=01 + * 0xE00 .. 0xDFF Trigger PQ=10 + * 0xF00 .. 0xDFF Trigger PQ=11 + */ +static void xive_source_esb_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) +{ + XiveSource *xsrc = XIVE_SOURCE(opaque); + uint32_t offset = addr & 0xFFF; + uint32_t srcno = addr >> xsrc->esb_shift; + bool notify = false; + + /* In a two pages ESB MMIO setting, trigger page only triggers */ + if (xive_source_is_trigger_page(xsrc, addr)) { + notify = xive_source_esb_trigger(xsrc, srcno); + goto out; + } + + switch (offset) { + case 0 ... 0x3FF: + notify = xive_source_esb_trigger(xsrc, srcno); + break; + + case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF: + if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) { + qemu_log_mask(LOG_GUEST_ERROR, + "XIVE: invalid Store EOI for IRQ %d\n", srcno); + return; + } + + notify = xive_source_esb_eoi(xsrc, srcno); + break; + + case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF: + case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF: + case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF: + case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF: + xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3); + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n", + offset); + return; + } + +out: + /* Forward the source event notification for routing */ + if (notify) { + xive_source_notify(xsrc, srcno); + } +} + +static const MemoryRegionOps xive_source_esb_ops = { + .read = xive_source_esb_read, + .write = xive_source_esb_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 8, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 8, + .max_access_size = 8, + }, +}; + +static void xive_source_set_irq(void *opaque, int srcno, int val) +{ + XiveSource *xsrc = XIVE_SOURCE(opaque); + bool notify = false; + + if (xive_source_irq_is_lsi(xsrc, srcno)) { + if (val) { + notify = xive_source_lsi_trigger(xsrc, srcno); + } else { + xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED; + } + } else { + if (val) { + notify = xive_source_esb_trigger(xsrc, srcno); + } + } + + /* Forward the source event notification for routing */ + if (notify) { + xive_source_notify(xsrc, srcno); + } +} + +void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon) +{ + int i; + + for (i = 0; i < xsrc->nr_irqs; i++) { + uint8_t pq = xive_source_esb_get(xsrc, i); + + if (pq == XIVE_ESB_OFF) { + continue; + } + + monitor_printf(mon, " %08x %s %c%c%c\n", i + offset, + xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI", + pq & XIVE_ESB_VAL_P ? 'P' : '-', + pq & XIVE_ESB_VAL_Q ? 'Q' : '-', + xsrc->status[i] & XIVE_STATUS_ASSERTED ? 'A' : ' '); + } +} + +static void xive_source_reset(void *dev) +{ + XiveSource *xsrc = XIVE_SOURCE(dev); + + /* Do not clear the LSI bitmap */ + + /* PQs are initialized to 0b01 (Q=1) which corresponds to "ints off" */ + memset(xsrc->status, XIVE_ESB_OFF, xsrc->nr_irqs); +} + +static void xive_source_realize(DeviceState *dev, Error **errp) +{ + XiveSource *xsrc = XIVE_SOURCE(dev); + Object *obj; + Error *local_err = NULL; + + obj = object_property_get_link(OBJECT(dev), "xive", &local_err); + if (!obj) { + error_propagate(errp, local_err); + error_prepend(errp, "required link 'xive' not found: "); + return; + } + + xsrc->xive = XIVE_NOTIFIER(obj); + + if (!xsrc->nr_irqs) { + error_setg(errp, "Number of interrupt needs to be greater than 0"); + return; + } + + if (xsrc->esb_shift != XIVE_ESB_4K && + xsrc->esb_shift != XIVE_ESB_4K_2PAGE && + xsrc->esb_shift != XIVE_ESB_64K && + xsrc->esb_shift != XIVE_ESB_64K_2PAGE) { + error_setg(errp, "Invalid ESB shift setting"); + return; + } + + xsrc->status = g_malloc0(xsrc->nr_irqs); + xsrc->lsi_map = bitmap_new(xsrc->nr_irqs); + + memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc), + &xive_source_esb_ops, xsrc, "xive.esb", + (1ull << xsrc->esb_shift) * xsrc->nr_irqs); + + xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, + xsrc->nr_irqs); + + qemu_register_reset(xive_source_reset, dev); +} + +static const VMStateDescription vmstate_xive_source = { + .name = TYPE_XIVE_SOURCE, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL), + VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs), + VMSTATE_END_OF_LIST() + }, +}; + +/* + * The default XIVE interrupt source setting for the ESB MMIOs is two + * 64k pages without Store EOI, to be in sync with KVM. + */ +static Property xive_source_properties[] = { + DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0), + DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0), + DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE), + DEFINE_PROP_END_OF_LIST(), +}; + +static void xive_source_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "XIVE Interrupt Source"; + dc->props = xive_source_properties; + dc->realize = xive_source_realize; + dc->vmsd = &vmstate_xive_source; +} + +static const TypeInfo xive_source_info = { + .name = TYPE_XIVE_SOURCE, + .parent = TYPE_DEVICE, + .instance_size = sizeof(XiveSource), + .class_init = xive_source_class_init, +}; + +/* + * XiveEND helpers + */ + +void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon) +{ + uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 + | be32_to_cpu(end->w3); + uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0); + uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); + uint32_t qentries = 1 << (qsize + 10); + int i; + + /* + * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window + */ + monitor_printf(mon, " [ "); + qindex = (qindex - (width - 1)) & (qentries - 1); + for (i = 0; i < width; i++) { + uint64_t qaddr = qaddr_base + (qindex << 2); + uint32_t qdata = -1; + + if (dma_memory_read(&address_space_memory, qaddr, &qdata, + sizeof(qdata))) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%" + HWADDR_PRIx "\n", qaddr); + return; + } + monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "", + be32_to_cpu(qdata)); + qindex = (qindex + 1) & (qentries - 1); + } +} + +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon) +{ + uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 + | be32_to_cpu(end->w3); + uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); + uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1); + uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0); + uint32_t qentries = 1 << (qsize + 10); + + uint32_t nvt = xive_get_field32(END_W6_NVT_INDEX, end->w6); + uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7); + + if (!xive_end_is_valid(end)) { + return; + } + + monitor_printf(mon, " %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64 + "% 6d/%5d ^%d", end_idx, + xive_end_is_valid(end) ? 'v' : '-', + xive_end_is_enqueue(end) ? 'q' : '-', + xive_end_is_notify(end) ? 'n' : '-', + xive_end_is_backlog(end) ? 'b' : '-', + xive_end_is_escalate(end) ? 'e' : '-', + priority, nvt, qaddr_base, qindex, qentries, qgen); + + xive_end_queue_pic_print_info(end, 6, mon); + monitor_printf(mon, "]\n"); +} + +static void xive_end_enqueue(XiveEND *end, uint32_t data) +{ + uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 + | be32_to_cpu(end->w3); + uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0); + uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); + uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1); + + uint64_t qaddr = qaddr_base + (qindex << 2); + uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff)); + uint32_t qentries = 1 << (qsize + 10); + + if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%" + HWADDR_PRIx "\n", qaddr); + return; + } + + qindex = (qindex + 1) & (qentries - 1); + if (qindex == 0) { + qgen ^= 1; + end->w1 = xive_set_field32(END_W1_GENERATION, end->w1, qgen); + } + end->w1 = xive_set_field32(END_W1_PAGE_OFF, end->w1, qindex); +} + +/* + * XIVE Router (aka. Virtualization Controller or IVRE) + */ + +int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx, + XiveEAS *eas) +{ + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); + + return xrc->get_eas(xrtr, eas_blk, eas_idx, eas); +} + +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end) +{ + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); + + return xrc->get_end(xrtr, end_blk, end_idx, end); +} + +int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end, uint8_t word_number) +{ + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); + + return xrc->write_end(xrtr, end_blk, end_idx, end, word_number); +} + +int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, + XiveNVT *nvt) +{ + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); + + return xrc->get_nvt(xrtr, nvt_blk, nvt_idx, nvt); +} + +int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, + XiveNVT *nvt, uint8_t word_number) +{ + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); + + return xrc->write_nvt(xrtr, nvt_blk, nvt_idx, nvt, word_number); +} + +/* + * The thread context register words are in big-endian format. + */ +static int xive_presenter_tctx_match(XiveTCTX *tctx, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool cam_ignore, uint32_t logic_serv) +{ + uint32_t cam = xive_nvt_cam_line(nvt_blk, nvt_idx); + uint32_t qw2w2 = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]); + uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]); + uint32_t qw0w2 = xive_tctx_word2(&tctx->regs[TM_QW0_USER]); + + /* + * TODO (PowerNV): ignore mode. The low order bits of the NVT + * identifier are ignored in the "CAM" match. + */ + + if (format == 0) { + if (cam_ignore == true) { + /* + * F=0 & i=1: Logical server notification (bits ignored at + * the end of the NVT identifier) + */ + qemu_log_mask(LOG_UNIMP, "XIVE: no support for LS NVT %x/%x\n", + nvt_blk, nvt_idx); + return -1; + } + + /* F=0 & i=0: Specific NVT notification */ + + /* TODO (PowerNV) : PHYS ring */ + + /* HV POOL ring */ + if ((be32_to_cpu(qw2w2) & TM_QW2W2_VP) && + cam == xive_get_field32(TM_QW2W2_POOL_CAM, qw2w2)) { + return TM_QW2_HV_POOL; + } + + /* OS ring */ + if ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) && + cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) { + return TM_QW1_OS; + } + } else { + /* F=1 : User level Event-Based Branch (EBB) notification */ + + /* USER ring */ + if ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) && + (cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) && + (be32_to_cpu(qw0w2) & TM_QW0W2_VU) && + (logic_serv == xive_get_field32(TM_QW0W2_LOGIC_SERV, qw0w2))) { + return TM_QW0_USER; + } + } + return -1; +} + +typedef struct XiveTCTXMatch { + XiveTCTX *tctx; + uint8_t ring; +} XiveTCTXMatch; + +static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool cam_ignore, uint8_t priority, + uint32_t logic_serv, XiveTCTXMatch *match) +{ + CPUState *cs; + + /* + * TODO (PowerNV): handle chip_id overwrite of block field for + * hardwired CAM compares + */ + + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + XiveTCTX *tctx = XIVE_TCTX(cpu->intc); + int ring; + + /* + * HW checks that the CPU is enabled in the Physical Thread + * Enable Register (PTER). + */ + + /* + * Check the thread context CAM lines and record matches. We + * will handle CPU exception delivery later + */ + ring = xive_presenter_tctx_match(tctx, format, nvt_blk, nvt_idx, + cam_ignore, logic_serv); + /* + * Save the context and follow on to catch duplicates, that we + * don't support yet. + */ + if (ring != -1) { + if (match->tctx) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread " + "context NVT %x/%x\n", nvt_blk, nvt_idx); + return false; + } + + match->ring = ring; + match->tctx = tctx; + } + } + + if (!match->tctx) { + qemu_log_mask(LOG_UNIMP, "XIVE: NVT %x/%x is not dispatched\n", + nvt_blk, nvt_idx); + return false; + } + + return true; +} + +/* + * This is our simple Xive Presenter Engine model. It is merged in the + * Router as it does not require an extra object. + * + * It receives notification requests sent by the IVRE to find one + * matching NVT (or more) dispatched on the processor threads. In case + * of a single NVT notification, the process is abreviated and the + * thread is signaled if a match is found. In case of a logical server + * notification (bits ignored at the end of the NVT identifier), the + * IVPE and IVRE select a winning thread using different filters. This + * involves 2 or 3 exchanges on the PowerBus that the model does not + * support. + * + * The parameters represent what is sent on the PowerBus + */ +static void xive_presenter_notify(XiveRouter *xrtr, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool cam_ignore, uint8_t priority, + uint32_t logic_serv) +{ + XiveNVT nvt; + XiveTCTXMatch match = { .tctx = NULL, .ring = 0 }; + bool found; + + /* NVT cache lookup */ + if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVT %x/%x\n", + nvt_blk, nvt_idx); + return; + } + + if (!xive_nvt_is_valid(&nvt)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is invalid\n", + nvt_blk, nvt_idx); + return; + } + + found = xive_presenter_match(xrtr, format, nvt_blk, nvt_idx, cam_ignore, + priority, logic_serv, &match); + if (found) { + ipb_update(&match.tctx->regs[match.ring], priority); + xive_tctx_notify(match.tctx, match.ring); + return; + } + + /* Record the IPB in the associated NVT structure */ + ipb_update((uint8_t *) &nvt.w4, priority); + xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4); + + /* + * If no matching NVT is dispatched on a HW thread : + * - update the NVT structure if backlog is activated + * - escalate (ESe PQ bits and EAS in w4-5) if escalation is + * activated + */ +} + +/* + * An END trigger can come from an event trigger (IPI or HW) or from + * another chip. We don't model the PowerBus but the END trigger + * message has the same parameters than in the function below. + */ +static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk, + uint32_t end_idx, uint32_t end_data) +{ + XiveEND end; + uint8_t priority; + uint8_t format; + + /* END cache lookup */ + if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk, + end_idx); + return; + } + + if (!xive_end_is_valid(&end)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n", + end_blk, end_idx); + return; + } + + if (xive_end_is_enqueue(&end)) { + xive_end_enqueue(&end, end_data); + /* Enqueuing event data modifies the EQ toggle and index */ + xive_router_write_end(xrtr, end_blk, end_idx, &end, 1); + } + + /* + * The W7 format depends on the F bit in W6. It defines the type + * of the notification : + * + * F=0 : single or multiple NVT notification + * F=1 : User level Event-Based Branch (EBB) notification, no + * priority + */ + format = xive_get_field32(END_W6_FORMAT_BIT, end.w6); + priority = xive_get_field32(END_W7_F0_PRIORITY, end.w7); + + /* The END is masked */ + if (format == 0 && priority == 0xff) { + return; + } + + /* + * Check the END ESn (Event State Buffer for notification) for + * even futher coalescing in the Router + */ + if (!xive_end_is_notify(&end)) { + uint8_t pq = xive_get_field32(END_W1_ESn, end.w1); + bool notify = xive_esb_trigger(&pq); + + if (pq != xive_get_field32(END_W1_ESn, end.w1)) { + end.w1 = xive_set_field32(END_W1_ESn, end.w1, pq); + xive_router_write_end(xrtr, end_blk, end_idx, &end, 1); + } + + /* ESn[Q]=1 : end of notification */ + if (!notify) { + return; + } + } + + /* + * Follows IVPE notification + */ + xive_presenter_notify(xrtr, format, + xive_get_field32(END_W6_NVT_BLOCK, end.w6), + xive_get_field32(END_W6_NVT_INDEX, end.w6), + xive_get_field32(END_W7_F0_IGNORE, end.w7), + priority, + xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7)); + + /* TODO: Auto EOI. */ +} + +static void xive_router_notify(XiveNotifier *xn, uint32_t lisn) +{ + XiveRouter *xrtr = XIVE_ROUTER(xn); + uint8_t eas_blk = XIVE_SRCNO_BLOCK(lisn); + uint32_t eas_idx = XIVE_SRCNO_INDEX(lisn); + XiveEAS eas; + + /* EAS cache lookup */ + if (xive_router_get_eas(xrtr, eas_blk, eas_idx, &eas)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn); + return; + } + + /* + * The IVRE checks the State Bit Cache at this point. We skip the + * SBC lookup because the state bits of the sources are modeled + * internally in QEMU. + */ + + if (!xive_eas_is_valid(&eas)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn); + return; + } + + if (xive_eas_is_masked(&eas)) { + /* Notification completed */ + return; + } + + /* + * The event trigger becomes an END trigger + */ + xive_router_end_notify(xrtr, + xive_get_field64(EAS_END_BLOCK, eas.w), + xive_get_field64(EAS_END_INDEX, eas.w), + xive_get_field64(EAS_END_DATA, eas.w)); +} + +static void xive_router_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass); + + dc->desc = "XIVE Router Engine"; + xnc->notify = xive_router_notify; +} + +static const TypeInfo xive_router_info = { + .name = TYPE_XIVE_ROUTER, + .parent = TYPE_SYS_BUS_DEVICE, + .abstract = true, + .class_size = sizeof(XiveRouterClass), + .class_init = xive_router_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_XIVE_NOTIFIER }, + { } + } +}; + +void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon) +{ + if (!xive_eas_is_valid(eas)) { + return; + } + + monitor_printf(mon, " %08x %s end:%02x/%04x data:%08x\n", + lisn, xive_eas_is_masked(eas) ? "M" : " ", + (uint8_t) xive_get_field64(EAS_END_BLOCK, eas->w), + (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w), + (uint32_t) xive_get_field64(EAS_END_DATA, eas->w)); +} + +/* + * END ESB MMIO loads + */ +static uint64_t xive_end_source_read(void *opaque, hwaddr addr, unsigned size) +{ + XiveENDSource *xsrc = XIVE_END_SOURCE(opaque); + uint32_t offset = addr & 0xFFF; + uint8_t end_blk; + uint32_t end_idx; + XiveEND end; + uint32_t end_esmask; + uint8_t pq; + uint64_t ret = -1; + + end_blk = xsrc->block_id; + end_idx = addr >> (xsrc->esb_shift + 1); + + if (xive_router_get_end(xsrc->xrtr, end_blk, end_idx, &end)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk, + end_idx); + return -1; + } + + if (!xive_end_is_valid(&end)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n", + end_blk, end_idx); + return -1; + } + + end_esmask = addr_is_even(addr, xsrc->esb_shift) ? END_W1_ESn : END_W1_ESe; + pq = xive_get_field32(end_esmask, end.w1); + + switch (offset) { + case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF: + ret = xive_esb_eoi(&pq); + + /* Forward the source event notification for routing ?? */ + break; + + case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF: + ret = pq; + break; + + case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF: + case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF: + case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF: + case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF: + ret = xive_esb_set(&pq, (offset >> 8) & 0x3); + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid END ESB load addr %d\n", + offset); + return -1; + } + + if (pq != xive_get_field32(end_esmask, end.w1)) { + end.w1 = xive_set_field32(end_esmask, end.w1, pq); + xive_router_write_end(xsrc->xrtr, end_blk, end_idx, &end, 1); + } + + return ret; +} + +/* + * END ESB MMIO stores are invalid + */ +static void xive_end_source_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) +{ + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr 0x%" + HWADDR_PRIx"\n", addr); +} + +static const MemoryRegionOps xive_end_source_ops = { + .read = xive_end_source_read, + .write = xive_end_source_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 8, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 8, + .max_access_size = 8, + }, +}; + +static void xive_end_source_realize(DeviceState *dev, Error **errp) +{ + XiveENDSource *xsrc = XIVE_END_SOURCE(dev); + Object *obj; + Error *local_err = NULL; + + obj = object_property_get_link(OBJECT(dev), "xive", &local_err); + if (!obj) { + error_propagate(errp, local_err); + error_prepend(errp, "required link 'xive' not found: "); + return; + } + + xsrc->xrtr = XIVE_ROUTER(obj); + + if (!xsrc->nr_ends) { + error_setg(errp, "Number of interrupt needs to be greater than 0"); + return; + } + + if (xsrc->esb_shift != XIVE_ESB_4K && + xsrc->esb_shift != XIVE_ESB_64K) { + error_setg(errp, "Invalid ESB shift setting"); + return; + } + + /* + * Each END is assigned an even/odd pair of MMIO pages, the even page + * manages the ESn field while the odd page manages the ESe field. + */ + memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc), + &xive_end_source_ops, xsrc, "xive.end", + (1ull << (xsrc->esb_shift + 1)) * xsrc->nr_ends); +} + +static Property xive_end_source_properties[] = { + DEFINE_PROP_UINT8("block-id", XiveENDSource, block_id, 0), + DEFINE_PROP_UINT32("nr-ends", XiveENDSource, nr_ends, 0), + DEFINE_PROP_UINT32("shift", XiveENDSource, esb_shift, XIVE_ESB_64K), + DEFINE_PROP_END_OF_LIST(), +}; + +static void xive_end_source_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "XIVE END Source"; + dc->props = xive_end_source_properties; + dc->realize = xive_end_source_realize; +} + +static const TypeInfo xive_end_source_info = { + .name = TYPE_XIVE_END_SOURCE, + .parent = TYPE_DEVICE, + .instance_size = sizeof(XiveENDSource), + .class_init = xive_end_source_class_init, +}; + +/* + * XIVE Fabric + */ +static const TypeInfo xive_fabric_info = { + .name = TYPE_XIVE_NOTIFIER, + .parent = TYPE_INTERFACE, + .class_size = sizeof(XiveNotifierClass), +}; + +static void xive_register_types(void) +{ + type_register_static(&xive_source_info); + type_register_static(&xive_fabric_info); + type_register_static(&xive_router_info); + type_register_static(&xive_end_source_info); + type_register_static(&xive_tctx_info); +} + +type_init(xive_register_types) diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 3648630386..54746a4030 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -18,7 +18,6 @@ #include "qemu/osdep.h" #include "hw/hw.h" #include "hw/pci/pci.h" -#include "net/net.h" #include "net/tap.h" #include "net/checksum.h" #include "sysemu/sysemu.h" @@ -29,6 +28,7 @@ #include "migration/register.h" #include "vmxnet3.h" +#include "vmxnet3_defs.h" #include "vmxnet_debug.h" #include "vmware_utils.h" #include "net_tx_pkt.h" @@ -131,23 +131,11 @@ typedef struct VMXNET3Class { DeviceRealize parent_dc_realize; } VMXNET3Class; -#define TYPE_VMXNET3 "vmxnet3" -#define VMXNET3(obj) OBJECT_CHECK(VMXNET3State, (obj), TYPE_VMXNET3) - #define VMXNET3_DEVICE_CLASS(klass) \ OBJECT_CLASS_CHECK(VMXNET3Class, (klass), TYPE_VMXNET3) #define VMXNET3_DEVICE_GET_CLASS(obj) \ OBJECT_GET_CLASS(VMXNET3Class, (obj), TYPE_VMXNET3) -/* Cyclic ring abstraction */ -typedef struct { - hwaddr pa; - uint32_t size; - uint32_t cell_size; - uint32_t next; - uint8_t gen; -} Vmxnet3Ring; - static inline void vmxnet3_ring_init(PCIDevice *d, Vmxnet3Ring *ring, hwaddr pa, @@ -245,108 +233,6 @@ vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr) descr->rsvd, descr->dtype, descr->ext1, descr->btype); } -/* Device state and helper functions */ -#define VMXNET3_RX_RINGS_PER_QUEUE (2) - -typedef struct { - Vmxnet3Ring tx_ring; - Vmxnet3Ring comp_ring; - - uint8_t intr_idx; - hwaddr tx_stats_pa; - struct UPT1_TxStats txq_stats; -} Vmxnet3TxqDescr; - -typedef struct { - Vmxnet3Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE]; - Vmxnet3Ring comp_ring; - uint8_t intr_idx; - hwaddr rx_stats_pa; - struct UPT1_RxStats rxq_stats; -} Vmxnet3RxqDescr; - -typedef struct { - bool is_masked; - bool is_pending; - bool is_asserted; -} Vmxnet3IntState; - -typedef struct { - PCIDevice parent_obj; - NICState *nic; - NICConf conf; - MemoryRegion bar0; - MemoryRegion bar1; - MemoryRegion msix_bar; - - Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES]; - Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES]; - - /* Whether MSI-X support was installed successfully */ - bool msix_used; - hwaddr drv_shmem; - hwaddr temp_shared_guest_driver_memory; - - uint8_t txq_num; - - /* This boolean tells whether RX packet being indicated has to */ - /* be split into head and body chunks from different RX rings */ - bool rx_packets_compound; - - bool rx_vlan_stripping; - bool lro_supported; - - uint8_t rxq_num; - - /* Network MTU */ - uint32_t mtu; - - /* Maximum number of fragments for indicated TX packets */ - uint32_t max_tx_frags; - - /* Maximum number of fragments for indicated RX packets */ - uint16_t max_rx_frags; - - /* Index for events interrupt */ - uint8_t event_int_idx; - - /* Whether automatic interrupts masking enabled */ - bool auto_int_masking; - - bool peer_has_vhdr; - - /* TX packets to QEMU interface */ - struct NetTxPkt *tx_pkt; - uint32_t offload_mode; - uint32_t cso_or_gso_size; - uint16_t tci; - bool needs_vlan; - - struct NetRxPkt *rx_pkt; - - bool tx_sop; - bool skip_current_tx_pkt; - - uint32_t device_active; - uint32_t last_command; - - uint32_t link_status_and_speed; - - Vmxnet3IntState interrupt_states[VMXNET3_MAX_INTRS]; - - uint32_t temp_mac; /* To store the low part first */ - - MACAddr perm_mac; - uint32_t vlan_table[VMXNET3_VFT_SIZE]; - uint32_t rx_mode; - MACAddr *mcast_list; - uint32_t mcast_list_len; - uint32_t mcast_list_buff_size; /* needed for live migration. */ - - /* Compatibility flags for migration */ - uint32_t compat_flags; -} VMXNET3State; - /* Interrupt management */ /* diff --git a/hw/net/vmxnet3_defs.h b/hw/net/vmxnet3_defs.h new file mode 100644 index 0000000000..6c19d29b12 --- /dev/null +++ b/hw/net/vmxnet3_defs.h @@ -0,0 +1,133 @@ +/* + * QEMU VMWARE VMXNET3 paravirtual NIC + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman <dmitry@daynix.com> + * Tamir Shomer <tamirs@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + * + */ + +#include "net/net.h" +#include "hw/net/vmxnet3.h" + +#define TYPE_VMXNET3 "vmxnet3" +#define VMXNET3(obj) OBJECT_CHECK(VMXNET3State, (obj), TYPE_VMXNET3) + +/* Device state and helper functions */ +#define VMXNET3_RX_RINGS_PER_QUEUE (2) + +/* Cyclic ring abstraction */ +typedef struct { + hwaddr pa; + uint32_t size; + uint32_t cell_size; + uint32_t next; + uint8_t gen; +} Vmxnet3Ring; + +typedef struct { + Vmxnet3Ring tx_ring; + Vmxnet3Ring comp_ring; + + uint8_t intr_idx; + hwaddr tx_stats_pa; + struct UPT1_TxStats txq_stats; +} Vmxnet3TxqDescr; + +typedef struct { + Vmxnet3Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE]; + Vmxnet3Ring comp_ring; + uint8_t intr_idx; + hwaddr rx_stats_pa; + struct UPT1_RxStats rxq_stats; +} Vmxnet3RxqDescr; + +typedef struct { + bool is_masked; + bool is_pending; + bool is_asserted; +} Vmxnet3IntState; + +typedef struct { + PCIDevice parent_obj; + NICState *nic; + NICConf conf; + MemoryRegion bar0; + MemoryRegion bar1; + MemoryRegion msix_bar; + + Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES]; + Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES]; + + /* Whether MSI-X support was installed successfully */ + bool msix_used; + hwaddr drv_shmem; + hwaddr temp_shared_guest_driver_memory; + + uint8_t txq_num; + + /* This boolean tells whether RX packet being indicated has to */ + /* be split into head and body chunks from different RX rings */ + bool rx_packets_compound; + + bool rx_vlan_stripping; + bool lro_supported; + + uint8_t rxq_num; + + /* Network MTU */ + uint32_t mtu; + + /* Maximum number of fragments for indicated TX packets */ + uint32_t max_tx_frags; + + /* Maximum number of fragments for indicated RX packets */ + uint16_t max_rx_frags; + + /* Index for events interrupt */ + uint8_t event_int_idx; + + /* Whether automatic interrupts masking enabled */ + bool auto_int_masking; + + bool peer_has_vhdr; + + /* TX packets to QEMU interface */ + struct NetTxPkt *tx_pkt; + uint32_t offload_mode; + uint32_t cso_or_gso_size; + uint16_t tci; + bool needs_vlan; + + struct NetRxPkt *rx_pkt; + + bool tx_sop; + bool skip_current_tx_pkt; + + uint32_t device_active; + uint32_t last_command; + + uint32_t link_status_and_speed; + + Vmxnet3IntState interrupt_states[VMXNET3_MAX_INTRS]; + + uint32_t temp_mac; /* To store the low part first */ + + MACAddr perm_mac; + uint32_t vlan_table[VMXNET3_VFT_SIZE]; + uint32_t rx_mode; + MACAddr *mcast_list; + uint32_t mcast_list_len; + uint32_t mcast_list_buff_size; /* needed for live migration. */ + + /* Compatibility flags for migration */ + uint32_t compat_flags; +} VMXNET3State; diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c index 299de429ec..9766edb445 100644 --- a/hw/pci-bridge/gen_pcie_root_port.c +++ b/hw/pci-bridge/gen_pcie_root_port.c @@ -124,6 +124,10 @@ static Property gen_rp_props[] = { res_reserve.mem_pref_32, -1), DEFINE_PROP_SIZE("pref64-reserve", GenPCIERootPort, res_reserve.mem_pref_64, -1), + DEFINE_PROP_PCIE_LINK_SPEED("x-speed", PCIESlot, + speed, PCIE_LINK_SPEED_16), + DEFINE_PROP_PCIE_LINK_WIDTH("x-width", PCIESlot, + width, PCIE_LINK_WIDTH_32), DEFINE_PROP_END_OF_LIST() }; diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c index 97a8e8b6a4..ff6b8323da 100644 --- a/hw/pci-bridge/pci_bridge_dev.c +++ b/hw/pci-bridge/pci_bridge_dev.c @@ -206,31 +206,39 @@ static const VMStateDescription pci_bridge_dev_vmstate = { } }; -static void pci_bridge_dev_hotplug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +void pci_bridge_dev_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) { PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); if (!shpc_present(pci_hotplug_dev)) { error_setg(errp, "standard hotplug controller has been disabled for " - "this %s", TYPE_PCI_BRIDGE_DEV); + "this %s", object_get_typename(OBJECT(hotplug_dev))); return; } - shpc_device_hotplug_cb(hotplug_dev, dev, errp); + shpc_device_plug_cb(hotplug_dev, dev, errp); } -static void pci_bridge_dev_hot_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, - Error **errp) +void pci_bridge_dev_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); + + g_assert(shpc_present(pci_hotplug_dev)); + shpc_device_unplug_cb(hotplug_dev, dev, errp); +} + +void pci_bridge_dev_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); if (!shpc_present(pci_hotplug_dev)) { error_setg(errp, "standard hotplug controller has been disabled for " - "this %s", TYPE_PCI_BRIDGE_DEV); + "this %s", object_get_typename(OBJECT(hotplug_dev))); return; } - shpc_device_hot_unplug_request_cb(hotplug_dev, dev, errp); + shpc_device_unplug_request_cb(hotplug_dev, dev, errp); } static void pci_bridge_dev_class_init(ObjectClass *klass, void *data) @@ -251,8 +259,9 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data) dc->props = pci_bridge_dev_properties; dc->vmsd = &pci_bridge_dev_vmstate; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); - hc->plug = pci_bridge_dev_hotplug_cb; - hc->unplug_request = pci_bridge_dev_hot_unplug_request_cb; + hc->plug = pci_bridge_dev_plug_cb; + hc->unplug = pci_bridge_dev_unplug_cb; + hc->unplug_request = pci_bridge_dev_unplug_request_cb; } static const TypeInfo pci_bridge_dev_info = { diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c index 04cf5a6a92..d491b40d04 100644 --- a/hw/pci-bridge/pcie_pci_bridge.c +++ b/hw/pci-bridge/pcie_pci_bridge.c @@ -137,33 +137,6 @@ static const VMStateDescription pcie_pci_bridge_dev_vmstate = { } }; -static void pcie_pci_bridge_hotplug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); - - if (!shpc_present(pci_hotplug_dev)) { - error_setg(errp, "standard hotplug controller has been disabled for " - "this %s", TYPE_PCIE_PCI_BRIDGE_DEV); - return; - } - shpc_device_hotplug_cb(hotplug_dev, dev, errp); -} - -static void pcie_pci_bridge_hot_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, - Error **errp) -{ - PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); - - if (!shpc_present(pci_hotplug_dev)) { - error_setg(errp, "standard hotplug controller has been disabled for " - "this %s", TYPE_PCIE_PCI_BRIDGE_DEV); - return; - } - shpc_device_hot_unplug_request_cb(hotplug_dev, dev, errp); -} - static void pcie_pci_bridge_class_init(ObjectClass *klass, void *data) { PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); @@ -180,8 +153,9 @@ static void pcie_pci_bridge_class_init(ObjectClass *klass, void *data) dc->props = pcie_pci_bridge_dev_properties; dc->reset = &pcie_pci_bridge_reset; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); - hc->plug = pcie_pci_bridge_hotplug_cb; - hc->unplug_request = pcie_pci_bridge_hot_unplug_request_cb; + hc->plug = pci_bridge_dev_plug_cb; + hc->unplug = pci_bridge_dev_unplug_cb; + hc->unplug_request = pci_bridge_dev_unplug_request_cb; } static const TypeInfo pcie_pci_bridge_info = { diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c index 45f9e8cd4a..34ad76743c 100644 --- a/hw/pci-bridge/pcie_root_port.c +++ b/hw/pci-bridge/pcie_root_port.c @@ -140,6 +140,19 @@ static Property rp_props[] = { DEFINE_PROP_END_OF_LIST() }; +static void rp_instance_post_init(Object *obj) +{ + PCIESlot *s = PCIE_SLOT(obj); + + if (!s->speed) { + s->speed = QEMU_PCI_EXP_LNK_2_5GT; + } + + if (!s->width) { + s->width = QEMU_PCI_EXP_LNK_X1; + } +} + static void rp_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -157,6 +170,7 @@ static void rp_class_init(ObjectClass *klass, void *data) static const TypeInfo rp_info = { .name = TYPE_PCIE_ROOT_PORT, .parent = TYPE_PCIE_SLOT, + .instance_post_init = rp_instance_post_init, .class_init = rp_class_init, .abstract = true, .class_size = sizeof(PCIERootPortClass), diff --git a/hw/pci/pci.c b/hw/pci/pci.c index efb5ce196f..d831fa0a36 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1353,6 +1353,10 @@ uint32_t pci_default_read_config(PCIDevice *d, { uint32_t val = 0; + if (pci_is_express_downstream_port(d) && + ranges_overlap(address, len, d->exp.exp_cap + PCI_EXP_LNKSTA, 2)) { + pcie_sync_bridge_lnk(d); + } memcpy(&val, d->config + address, len); return le32_to_cpu(val); } diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c index ee9dff2d3a..b9143ac88b 100644 --- a/hw/pci/pci_bridge.c +++ b/hw/pci/pci_bridge.c @@ -241,9 +241,9 @@ void pci_bridge_update_mappings(PCIBridge *br) * while another accesses an unaffected region. */ memory_region_transaction_begin(); pci_bridge_region_del(br, br->windows); + pci_bridge_region_cleanup(br, w); br->windows = pci_bridge_region_init(br); memory_region_transaction_commit(); - pci_bridge_region_cleanup(br, w); } /* default write_config function for PCI-to-PCI bridge */ diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index 5eaa935cb5..5f5345dbac 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "hw/pci/pci.h" +#include "hw/pci/pci_bridge.h" #include "hw/pci/pci_host.h" #include "hw/pci/pci_bus.h" #include "trace.h" @@ -50,9 +51,29 @@ static inline PCIDevice *pci_dev_find_by_addr(PCIBus *bus, uint32_t addr) return pci_find_device(bus, bus_num, devfn); } +static void pci_adjust_config_limit(PCIBus *bus, uint32_t *limit) +{ + if (*limit > PCI_CONFIG_SPACE_SIZE) { + if (!pci_bus_is_express(bus)) { + *limit = PCI_CONFIG_SPACE_SIZE; + return; + } + + if (!pci_bus_is_root(bus)) { + PCIDevice *bridge = pci_bridge_get_device(bus); + pci_adjust_config_limit(pci_get_bus(bridge), limit); + } + } +} + void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, uint32_t limit, uint32_t val, uint32_t len) { + pci_adjust_config_limit(pci_get_bus(pci_dev), &limit); + if (limit <= addr) { + return; + } + assert(len <= 4); /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. @@ -71,6 +92,11 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, { uint32_t ret; + pci_adjust_config_limit(pci_get_bus(pci_dev), &limit); + if (limit <= addr) { + return ~0x0; + } + assert(len <= 4); /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 6c91bd44a0..2d3d8a047b 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -27,6 +27,7 @@ #include "hw/pci/msi.h" #include "hw/pci/pci_bus.h" #include "hw/pci/pcie_regs.h" +#include "hw/pci/pcie_port.h" #include "qemu/range.h" //#define DEBUG_PCIE @@ -68,11 +69,12 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version) pci_set_long(exp_cap + PCI_EXP_LNKCAP, (port << PCI_EXP_LNKCAP_PN_SHIFT) | PCI_EXP_LNKCAP_ASPMS_0S | - PCI_EXP_LNK_MLW_1 | - PCI_EXP_LNK_LS_25); + QEMU_PCI_EXP_LNKCAP_MLW(QEMU_PCI_EXP_LNK_X1) | + QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT)); pci_set_word(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25); + QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1) | + QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT)); if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, @@ -86,6 +88,76 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version) pci_set_word(cmask + PCI_EXP_LNKSTA, 0); } +static void pcie_cap_fill_slot_lnk(PCIDevice *dev) +{ + PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT); + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + + /* Skip anything that isn't a PCIESlot */ + if (!s) { + return; + } + + /* Clear and fill LNKCAP from what was configured above */ + pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP, + PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS); + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, + QEMU_PCI_EXP_LNKCAP_MLW(s->width) | + QEMU_PCI_EXP_LNKCAP_MLS(s->speed)); + + /* + * Link bandwidth notification is required for all root ports and + * downstream ports supporting links wider than x1 or multiple link + * speeds. + */ + if (s->width > QEMU_PCI_EXP_LNK_X1 || + s->speed > QEMU_PCI_EXP_LNK_2_5GT) { + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, + PCI_EXP_LNKCAP_LBNC); + } + + if (s->speed > QEMU_PCI_EXP_LNK_2_5GT) { + /* + * Hot-plug capable downstream ports and downstream ports supporting + * link speeds greater than 5GT/s must hardwire PCI_EXP_LNKCAP_DLLLARC + * to 1b. PCI_EXP_LNKCAP_DLLLARC implies PCI_EXP_LNKSTA_DLLLA, which + * we also hardwire to 1b here. 2.5GT/s hot-plug slots should also + * technically implement this, but it's not done here for compatibility. + */ + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, + PCI_EXP_LNKCAP_DLLLARC); + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + + /* + * Target Link Speed defaults to the highest link speed supported by + * the component. 2.5GT/s devices are permitted to hardwire to zero. + */ + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS); + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKCTL2, + QEMU_PCI_EXP_LNKCAP_MLS(s->speed) & + PCI_EXP_LNKCTL2_TLS); + } + + /* + * 2.5 & 5.0GT/s can be fully described by LNKCAP, but 8.0GT/s is + * actually a reference to the highest bit supported in this register. + * We assume the device supports all link speeds. + */ + if (s->speed > QEMU_PCI_EXP_LNK_5GT) { + pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP2, ~0U); + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, + PCI_EXP_LNKCAP2_SLS_2_5GB | + PCI_EXP_LNKCAP2_SLS_5_0GB | + PCI_EXP_LNKCAP2_SLS_8_0GB); + if (s->speed > QEMU_PCI_EXP_LNK_8GT) { + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, + PCI_EXP_LNKCAP2_SLS_16_0GB); + } + } +} + int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port, Error **errp) @@ -107,6 +179,9 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset, /* Filling values common with v1 */ pcie_cap_v1_fill(dev, port, type, PCI_EXP_FLAGS_VER2); + /* Fill link speed and width options */ + pcie_cap_fill_slot_lnk(dev); + /* Filling v2 specific values */ pci_set_long(exp_cap + PCI_EXP_DEVCAP2, PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP); @@ -315,9 +390,8 @@ static void pcie_cap_slot_event(PCIDevice *dev, PCIExpressHotPlugEvent event) hotplug_event_notify(dev); } -static void pcie_cap_slot_hotplug_common(PCIDevice *hotplug_dev, - DeviceState *dev, - uint8_t **exp_cap, Error **errp) +static void pcie_cap_slot_plug_common(PCIDevice *hotplug_dev, DeviceState *dev, + uint8_t **exp_cap, Error **errp) { *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap; uint16_t sltsta = pci_get_word(*exp_cap + PCI_EXP_SLTSTA); @@ -331,13 +405,13 @@ static void pcie_cap_slot_hotplug_common(PCIDevice *hotplug_dev, } } -void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - Error **errp) +void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) { uint8_t *exp_cap; PCIDevice *pci_dev = PCI_DEVICE(dev); - pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp); + pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp); /* Don't send event when device is enabled during qemu machine creation: * it is present on boot, no hotplug event is necessary. We do send an @@ -345,6 +419,10 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, if (!dev->hotplugged) { pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS); + if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } return; } @@ -355,24 +433,36 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, if (pci_get_function_0(pci_dev)) { pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS); + if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } pcie_cap_slot_event(PCI_DEVICE(hotplug_dev), PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); } } -static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) +void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) { object_unparent(OBJECT(dev)); } -void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(DEVICE(dev)); + + hotplug_handler_unplug(hotplug_ctrl, DEVICE(dev), &error_abort); +} + +void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { uint8_t *exp_cap; PCIDevice *pci_dev = PCI_DEVICE(dev); PCIBus *bus = pci_get_bus(pci_dev); - pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp); + pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp); /* In case user cancel the operation of multi-function hot-add, * remove the function that is unexposed to guest individually, @@ -531,6 +621,10 @@ void pcie_cap_slot_write_config(PCIDevice *dev, pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS); + if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC); } @@ -728,6 +822,45 @@ void pcie_add_capability(PCIDevice *dev, memset(dev->cmask + offset, 0xFF, size); } +/* + * Sync the PCIe Link Status negotiated speed and width of a bridge with the + * downstream device. If downstream device is not present, re-write with the + * Link Capability fields. Limit width and speed to bridge capabilities for + * compatibility. Use config_read to access the downstream device since it + * could be an assigned device with volatile link information. + */ +void pcie_sync_bridge_lnk(PCIDevice *bridge_dev) +{ + PCIBridge *br = PCI_BRIDGE(bridge_dev); + PCIBus *bus = pci_bridge_get_sec_bus(br); + PCIDevice *target = bus->devices[0]; + uint8_t *exp_cap = bridge_dev->config + bridge_dev->exp.exp_cap; + uint16_t lnksta, lnkcap = pci_get_word(exp_cap + PCI_EXP_LNKCAP); + + if (!target || !target->exp.exp_cap) { + lnksta = lnkcap; + } else { + lnksta = target->config_read(target, + target->exp.exp_cap + PCI_EXP_LNKSTA, + sizeof(lnksta)); + + if ((lnksta & PCI_EXP_LNKSTA_NLW) > (lnkcap & PCI_EXP_LNKCAP_MLW)) { + lnksta &= ~PCI_EXP_LNKSTA_NLW; + lnksta |= lnkcap & PCI_EXP_LNKCAP_MLW; + } + + if ((lnksta & PCI_EXP_LNKSTA_CLS) > (lnkcap & PCI_EXP_LNKCAP_SLS)) { + lnksta &= ~PCI_EXP_LNKSTA_CLS; + lnksta |= lnkcap & PCI_EXP_LNKCAP_SLS; + } + } + + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, lnksta & + (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW)); +} + /************************************************************************** * pci express extended capability helper functions */ diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index 6432b9ac1f..bc07abc31b 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -154,8 +154,9 @@ static void pcie_slot_class_init(ObjectClass *oc, void *data) HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); dc->props = pcie_slot_props; - hc->plug = pcie_cap_slot_hotplug_cb; - hc->unplug_request = pcie_cap_slot_hot_unplug_request_cb; + hc->plug = pcie_cap_slot_plug_cb; + hc->unplug = pcie_cap_slot_unplug_cb; + hc->unplug_request = pcie_cap_slot_unplug_request_cb; } static const TypeInfo pcie_slot_type_info = { diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index 96a43d2f70..45053b39b9 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -238,6 +238,7 @@ static void shpc_invalid_command(SHPCDevice *shpc) static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot) { + HotplugHandler *hotplug_ctrl; int devfn; int pci_slot = SHPC_IDX_TO_PCI(slot); for (devfn = PCI_DEVFN(pci_slot, 0); @@ -245,7 +246,9 @@ static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot) ++devfn) { PCIDevice *affected_dev = shpc->sec_bus->devices[devfn]; if (affected_dev) { - object_unparent(OBJECT(affected_dev)); + hotplug_ctrl = qdev_get_hotplug_handler(DEVICE(affected_dev)); + hotplug_handler_unplug(hotplug_ctrl, DEVICE(affected_dev), + &error_abort); } } } @@ -482,8 +485,8 @@ static const MemoryRegionOps shpc_mmio_ops = { .max_access_size = 4, }, }; -static void shpc_device_hotplug_common(PCIDevice *affected_dev, int *slot, - SHPCDevice *shpc, Error **errp) +static void shpc_device_plug_common(PCIDevice *affected_dev, int *slot, + SHPCDevice *shpc, Error **errp) { int pci_slot = PCI_SLOT(affected_dev->devfn); *slot = SHPC_PCI_TO_IDX(pci_slot); @@ -497,7 +500,7 @@ static void shpc_device_hotplug_common(PCIDevice *affected_dev, int *slot, } } -void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, +void shpc_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { Error *local_err = NULL; @@ -505,7 +508,7 @@ void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, SHPCDevice *shpc = pci_hotplug_dev->shpc; int slot; - shpc_device_hotplug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); + shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -540,8 +543,14 @@ void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, shpc_interrupt_update(pci_hotplug_dev); } -void shpc_device_hot_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +void shpc_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + object_unparent(OBJECT(dev)); +} + +void shpc_device_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { Error *local_err = NULL; PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); @@ -550,7 +559,7 @@ void shpc_device_hot_unplug_request_cb(HotplugHandler *hotplug_dev, uint8_t led; int slot; - shpc_device_hotplug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); + shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); if (local_err) { error_propagate(errp, local_err); return; diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index e6747fce28..b20fea0dfc 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -685,7 +685,7 @@ static void ppce500_cpu_reset(void *opaque) } static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms, - qemu_irq **irqs) + IrqLines *irqs) { DeviceState *dev; SysBusDevice *s; @@ -705,7 +705,7 @@ static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms, k = 0; for (i = 0; i < smp_cpus; i++) { for (j = 0; j < OPENPIC_OUTPUT_NB; j++) { - sysbus_connect_irq(s, k++, irqs[i][j]); + sysbus_connect_irq(s, k++, irqs[i].irq[j]); } } @@ -713,7 +713,7 @@ static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms, } static DeviceState *ppce500_init_mpic_kvm(const PPCE500MachineClass *pmc, - qemu_irq **irqs, Error **errp) + IrqLines *irqs, Error **errp) { Error *err = NULL; DeviceState *dev; @@ -742,7 +742,7 @@ static DeviceState *ppce500_init_mpic_kvm(const PPCE500MachineClass *pmc, static DeviceState *ppce500_init_mpic(PPCE500MachineState *pms, MemoryRegion *ccsr, - qemu_irq **irqs) + IrqLines *irqs) { MachineState *machine = MACHINE(pms); const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms); @@ -806,15 +806,14 @@ void ppce500_init(MachineState *machine) /* irq num for pin INTA, INTB, INTC and INTD is 1, 2, 3 and * 4 respectively */ unsigned int pci_irq_nrs[PCI_NUM_PINS] = {1, 2, 3, 4}; - qemu_irq **irqs; + IrqLines *irqs; DeviceState *dev, *mpicdev; CPUPPCState *firstenv = NULL; MemoryRegion *ccsr_addr_space; SysBusDevice *s; PPCE500CCSRState *ccsr; - irqs = g_malloc0(smp_cpus * sizeof(qemu_irq *)); - irqs[0] = g_malloc0(smp_cpus * sizeof(qemu_irq) * OPENPIC_OUTPUT_NB); + irqs = g_new0(IrqLines, smp_cpus); for (i = 0; i < smp_cpus; i++) { PowerPCCPU *cpu; CPUState *cs; @@ -834,10 +833,9 @@ void ppce500_init(MachineState *machine) firstenv = env; } - irqs[i] = irqs[0] + (i * OPENPIC_OUTPUT_NB); input = (qemu_irq *)env->irq_inputs; - irqs[i][OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT]; - irqs[i][OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT]; + irqs[i].irq[OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT]; + irqs[i].irq[OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT]; env->spr_cb[SPR_BOOKE_PIR].default_value = cs->cpu_index = i; env->mpic_iack = pmc->ccsrbar_base + MPC8544_MPIC_REGS_OFFSET + 0xa0; diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 7e45afae7c..bb19eaba36 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -115,7 +115,7 @@ static void ppc_core99_init(MachineState *machine) PowerPCCPU *cpu = NULL; CPUPPCState *env = NULL; char *filename; - qemu_irq **openpic_irqs; + IrqLines *openpic_irqs; int linux_boot, i, j, k; MemoryRegion *ram = g_new(MemoryRegion, 1), *bios = g_new(MemoryRegion, 1); hwaddr kernel_base, initrd_base, cmdline_base = 0; @@ -248,41 +248,37 @@ static void ppc_core99_init(MachineState *machine) memory_region_add_subregion(get_system_memory(), 0xf8000000, sysbus_mmio_get_region(s, 0)); - openpic_irqs = g_malloc0(smp_cpus * sizeof(qemu_irq *)); - openpic_irqs[0] = - g_malloc0(smp_cpus * sizeof(qemu_irq) * OPENPIC_OUTPUT_NB); + openpic_irqs = g_new0(IrqLines, smp_cpus); for (i = 0; i < smp_cpus; i++) { /* Mac99 IRQ connection between OpenPIC outputs pins * and PowerPC input pins */ switch (PPC_INPUT(env)) { case PPC_FLAGS_INPUT_6xx: - openpic_irqs[i] = openpic_irqs[0] + (i * OPENPIC_OUTPUT_NB); - openpic_irqs[i][OPENPIC_OUTPUT_INT] = + openpic_irqs[i].irq[OPENPIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT]; - openpic_irqs[i][OPENPIC_OUTPUT_CINT] = + openpic_irqs[i].irq[OPENPIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT]; - openpic_irqs[i][OPENPIC_OUTPUT_MCK] = + openpic_irqs[i].irq[OPENPIC_OUTPUT_MCK] = ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_MCP]; /* Not connected ? */ - openpic_irqs[i][OPENPIC_OUTPUT_DEBUG] = NULL; + openpic_irqs[i].irq[OPENPIC_OUTPUT_DEBUG] = NULL; /* Check this */ - openpic_irqs[i][OPENPIC_OUTPUT_RESET] = + openpic_irqs[i].irq[OPENPIC_OUTPUT_RESET] = ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_HRESET]; break; #if defined(TARGET_PPC64) case PPC_FLAGS_INPUT_970: - openpic_irqs[i] = openpic_irqs[0] + (i * OPENPIC_OUTPUT_NB); - openpic_irqs[i][OPENPIC_OUTPUT_INT] = + openpic_irqs[i].irq[OPENPIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_INT]; - openpic_irqs[i][OPENPIC_OUTPUT_CINT] = + openpic_irqs[i].irq[OPENPIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_INT]; - openpic_irqs[i][OPENPIC_OUTPUT_MCK] = + openpic_irqs[i].irq[OPENPIC_OUTPUT_MCK] = ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_MCP]; /* Not connected ? */ - openpic_irqs[i][OPENPIC_OUTPUT_DEBUG] = NULL; + openpic_irqs[i].irq[OPENPIC_OUTPUT_DEBUG] = NULL; /* Check this */ - openpic_irqs[i][OPENPIC_OUTPUT_RESET] = + openpic_irqs[i].irq[OPENPIC_OUTPUT_RESET] = ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_HRESET]; break; #endif /* defined(TARGET_PPC64) */ @@ -299,7 +295,7 @@ static void ppc_core99_init(MachineState *machine) k = 0; for (i = 0; i < smp_cpus; i++) { for (j = 0; j < OPENPIC_OUTPUT_NB; j++) { - sysbus_connect_irq(s, k++, openpic_irqs[i][j]); + sysbus_connect_irq(s, k++, openpic_irqs[i].irq[j]); } } g_free(openpic_irqs); diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c index 1b0a0a8ba3..f47b15f10e 100644 --- a/hw/ppc/ppc405_boards.c +++ b/hw/ppc/ppc405_boards.c @@ -149,7 +149,7 @@ static void ref405ep_init(MachineState *machine) MemoryRegion *bios; MemoryRegion *sram = g_new(MemoryRegion, 1); ram_addr_t bdloc; - MemoryRegion *ram_memories = g_malloc(2 * sizeof(*ram_memories)); + MemoryRegion *ram_memories = g_new(MemoryRegion, 2); hwaddr ram_bases[2], ram_sizes[2]; target_ulong sram_size; long bios_size; @@ -448,7 +448,7 @@ static void taihu_405ep_init(MachineState *machine) qemu_irq *pic; MemoryRegion *sysmem = get_system_memory(); MemoryRegion *bios; - MemoryRegion *ram_memories = g_malloc(2 * sizeof(*ram_memories)); + MemoryRegion *ram_memories = g_new(MemoryRegion, 2); MemoryRegion *ram = g_malloc0(sizeof(*ram)); hwaddr ram_bases[2], ram_sizes[2]; long bios_size; diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c index 5c58415cf1..e1aadf126d 100644 --- a/hw/ppc/ppc405_uc.c +++ b/hw/ppc/ppc405_uc.c @@ -1519,7 +1519,7 @@ CPUPPCState *ppc405cr_init(MemoryRegion *address_space_mem, /* OBP arbitrer */ ppc4xx_opba_init(0xef600600); /* Universal interrupt controller */ - irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB); + irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB); irqs[PPCUIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]; irqs[PPCUIC_OUTPUT_CINT] = @@ -1877,7 +1877,7 @@ CPUPPCState *ppc405ep_init(MemoryRegion *address_space_mem, /* Initialize timers */ ppc_booke_timers_init(cpu, sysclk, 0); /* Universal interrupt controller */ - irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB); + irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB); irqs[PPCUIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]; irqs[PPCUIC_OUTPUT_CINT] = diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c index f5720f979e..b8aa55d526 100644 --- a/hw/ppc/ppc440_bamboo.c +++ b/hw/ppc/ppc440_bamboo.c @@ -169,8 +169,7 @@ static void bamboo_init(MachineState *machine) unsigned int pci_irq_nrs[4] = { 28, 27, 26, 25 }; MemoryRegion *address_space_mem = get_system_memory(); MemoryRegion *isa = g_new(MemoryRegion, 1); - MemoryRegion *ram_memories - = g_malloc(PPC440EP_SDRAM_NR_BANKS * sizeof(*ram_memories)); + MemoryRegion *ram_memories = g_new(MemoryRegion, PPC440EP_SDRAM_NR_BANKS); hwaddr ram_bases[PPC440EP_SDRAM_NR_BANKS]; hwaddr ram_sizes[PPC440EP_SDRAM_NR_BANKS]; qemu_irq *pic; @@ -200,7 +199,7 @@ static void bamboo_init(MachineState *machine) ppc_dcr_init(env, NULL, NULL); /* interrupt controller */ - irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB); + irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB); irqs[PPCUIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]; irqs[PPCUIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]; pic = ppcuic_init(env, irqs, 0x0C0, 0, 1); diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c index 5aac58f36e..4b051c0950 100644 --- a/hw/ppc/sam460ex.c +++ b/hw/ppc/sam460ex.c @@ -430,7 +430,7 @@ static void sam460ex_init(MachineState *machine) ppc4xx_plb_init(env); /* interrupt controllers */ - irqs = g_malloc0(sizeof(*irqs) * PPCUIC_OUTPUT_NB); + irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB); irqs[PPCUIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]; irqs[PPCUIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]; uic[0] = ppcuic_init(env, irqs, 0xc0, 0, 1); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 55be0f56cb..19a07c5c9d 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -150,7 +150,7 @@ static void pre_2_10_vmstate_unregister_dummy_icp(int i) (void *)(uintptr_t) i); } -static int xics_max_server_number(sPAPRMachineState *spapr) +int spapr_max_server_number(sPAPRMachineState *spapr) { assert(spapr->vsmt); return DIV_ROUND_UP(max_cpus * spapr->vsmt, smp_threads); @@ -889,8 +889,6 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) /* ibm,associativity-lookup-arrays */ buf_len = (nr_nodes * 4 + 2) * sizeof(uint32_t); cur_index = int_buf = g_malloc0(buf_len); - - cur_index = int_buf; int_buf[0] = cpu_to_be32(nr_nodes); int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list */ cur_index += 2; @@ -1033,7 +1031,7 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt) cpu_to_be32(0), cpu_to_be32(0), cpu_to_be32(0), - cpu_to_be32(nb_numa_nodes ? nb_numa_nodes - 1 : 0), + cpu_to_be32(nb_numa_nodes ? nb_numa_nodes : 1), }; _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas")); @@ -1097,15 +1095,18 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt) spapr_dt_rtas_tokens(fdt, rtas); } -/* Prepare ibm,arch-vec-5-platform-support, which indicates the MMU features - * that the guest may request and thus the valid values for bytes 24..26 of - * option vector 5: */ -static void spapr_dt_ov5_platform_support(void *fdt, int chosen) +/* + * Prepare ibm,arch-vec-5-platform-support, which indicates the MMU + * and the XIVE features that the guest may request and thus the valid + * values for bytes 23..26 of option vector 5: + */ +static void spapr_dt_ov5_platform_support(sPAPRMachineState *spapr, void *fdt, + int chosen) { PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu); char val[2 * 4] = { - 23, 0x00, /* Xive mode, filled in below. */ + 23, spapr->irq->ov5, /* Xive mode. */ 24, 0x00, /* Hash/Radix, filled in below. */ 25, 0x00, /* Hash options: Segment Tables == no, GTSE == no. */ 26, 0x40, /* Radix options: GTSE == yes. */ @@ -1113,7 +1114,11 @@ static void spapr_dt_ov5_platform_support(void *fdt, int chosen) if (!ppc_check_compat(first_ppc_cpu, CPU_POWERPC_LOGICAL_3_00, 0, first_ppc_cpu->compat_pvr)) { - /* If we're in a pre POWER9 compat mode then the guest should do hash */ + /* + * If we're in a pre POWER9 compat mode then the guest should + * do hash and use the legacy interrupt mode + */ + val[1] = 0x00; /* XICS */ val[3] = 0x00; /* Hash */ } else if (kvm_enabled()) { if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) { @@ -1191,7 +1196,7 @@ static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt) _FDT(fdt_setprop_string(fdt, chosen, "stdout-path", stdout_path)); } - spapr_dt_ov5_platform_support(fdt, chosen); + spapr_dt_ov5_platform_support(spapr, fdt, chosen); g_free(stdout_path); g_free(bootlist); @@ -1270,7 +1275,8 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2)); /* /interrupt controller */ - spapr_dt_xics(xics_max_server_number(spapr), fdt, PHANDLE_XICP); + spapr->irq->dt_populate(spapr, spapr_max_server_number(spapr), fdt, + PHANDLE_XICP); ret = spapr_populate_memory(spapr, fdt); if (ret < 0) { @@ -1290,7 +1296,8 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, } QLIST_FOREACH(phb, &spapr->phbs, list) { - ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt, smc->irq->nr_msis); + ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt, + spapr->irq->nr_msis); if (ret < 0) { error_report("couldn't setup PCI devices in fdt"); exit(1); @@ -1620,6 +1627,12 @@ static void spapr_machine_reset(void) qemu_devices_reset(); + /* + * This is fixing some of the default configuration of the XIVE + * devices. To be called after the reset of the machine devices. + */ + spapr_irq_reset(spapr, &error_fatal); + /* DRC reset may cause a device to be unplugged. This will cause troubles * if this device is used by another device (eg, a running vhost backend * will crash QEMU if the DIMM holding the vring goes away). To avoid such @@ -1731,14 +1744,6 @@ static int spapr_post_load(void *opaque, int version_id) return err; } - if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) { - CPUState *cs; - CPU_FOREACH(cs) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - icp_resend(ICP(cpu->intc)); - } - } - /* In earlier versions, there was no separate qdev for the PAPR * RTC, so the RTC offset was stored directly in sPAPREnvironment. * So when migrating from those versions, poke the incoming offset @@ -1759,6 +1764,11 @@ static int spapr_post_load(void *opaque, int version_id) } } + err = spapr_irq_post_load(spapr, version_id); + if (err) { + return err; + } + return err; } @@ -2466,15 +2476,10 @@ static void spapr_init_cpus(sPAPRMachineState *spapr) boot_cores_nr = possible_cpus->len; } - /* VSMT must be set in order to be able to compute VCPU ids, ie to - * call xics_max_server_number() or spapr_vcpu_id(). - */ - spapr_set_vsmt_mode(spapr, &error_fatal); - if (smc->pre_2_10_has_unused_icps) { int i; - for (i = 0; i < xics_max_server_number(spapr); i++) { + for (i = 0; i < spapr_max_server_number(spapr); i++) { /* Dummy entries get deregistered when real ICPState objects * are registered during CPU core hotplug. */ @@ -2593,8 +2598,14 @@ static void spapr_machine_init(MachineState *machine) /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */ load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD; + /* + * VSMT must be set in order to be able to compute VCPU ids, ie to + * call spapr_max_server_number() or spapr_vcpu_id(). + */ + spapr_set_vsmt_mode(spapr, &error_fatal); + /* Set up Interrupt Controller before we create the VCPUs */ - smc->irq->init(spapr, &error_fatal); + spapr_irq_init(spapr, &error_fatal); /* Set up containers for ibm,client-architecture-support negotiated options */ @@ -2621,6 +2632,17 @@ static void spapr_machine_init(MachineState *machine) /* advertise support for ibm,dyamic-memory-v2 */ spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2); + /* advertise XIVE on POWER9 machines */ + if (spapr->irq->ov5 & SPAPR_OV5_XIVE_EXPLOIT) { + if (ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, + 0, spapr->max_compat_pvr)) { + spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT); + } else { + error_report("XIVE-only machines require a POWER9 CPU"); + exit(1); + } + } + /* init CPUs */ spapr_init_cpus(spapr); @@ -3031,9 +3053,38 @@ static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name, visit_type_uint32(v, name, (uint32_t *)opaque, errp); } +static char *spapr_get_ic_mode(Object *obj, Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + + if (spapr->irq == &spapr_irq_xics_legacy) { + return g_strdup("legacy"); + } else if (spapr->irq == &spapr_irq_xics) { + return g_strdup("xics"); + } else if (spapr->irq == &spapr_irq_xive) { + return g_strdup("xive"); + } + g_assert_not_reached(); +} + +static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + + /* The legacy IRQ backend can not be set */ + if (strcmp(value, "xics") == 0) { + spapr->irq = &spapr_irq_xics; + } else if (strcmp(value, "xive") == 0) { + spapr->irq = &spapr_irq_xive; + } else { + error_setg(errp, "Bad value for \"ic-mode\" property"); + } +} + static void spapr_instance_init(Object *obj) { sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); spapr->htab_fd = -1; spapr->use_hotplug_event_source = true; @@ -3067,6 +3118,14 @@ static void spapr_instance_init(Object *obj) " the host's SMT mode", &error_abort); object_property_add_bool(obj, "vfio-no-msix-emulation", spapr_get_msix_emulation, NULL, NULL); + + /* The machine class defines the default interrupt controller mode */ + spapr->irq = smc->irq; + object_property_add_str(obj, "ic-mode", spapr_get_ic_mode, + spapr_set_ic_mode, NULL); + object_property_set_description(obj, "ic-mode", + "Specifies the interrupt controller mode (xics, xive)", + NULL); } static void spapr_machine_finalizefn(Object *obj) @@ -3789,9 +3848,8 @@ static void spapr_pic_print_info(InterruptStatsProvider *obj, Monitor *mon) { sPAPRMachineState *spapr = SPAPR_MACHINE(obj); - sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); - smc->irq->print_info(spapr, mon); + spapr->irq->print_info(spapr, mon); } int spapr_get_vcpu_id(PowerPCCPU *cpu) @@ -3873,7 +3931,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) hc->unplug = spapr_machine_device_unplug; smc->dr_lmb_enabled = true; - mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0"); mc->has_hotpluggable_cpus = true; smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED; fwc->get_dev_path = spapr_get_fw_dev_path; @@ -3970,6 +4028,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc) { spapr_machine_4_0_class_options(mc); SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_3_1); + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); } DEFINE_SPAPR_MACHINE(3_1, "3.1", false); diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 2398ce62c0..82666436e9 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -11,7 +11,6 @@ #include "hw/ppc/spapr_cpu_core.h" #include "target/ppc/cpu.h" #include "hw/ppc/spapr.h" -#include "hw/ppc/xics.h" /* for icp_create() - to be removed */ #include "hw/boards.h" #include "qapi/error.h" #include "sysemu/cpus.h" @@ -233,8 +232,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr, qemu_register_reset(spapr_cpu_reset, cpu); spapr_cpu_reset(cpu); - cpu->intc = icp_create(OBJECT(cpu), spapr->icp_type, XICS_FABRIC(spapr), - &local_err); + cpu->intc = spapr->irq->cpu_intc_create(spapr, OBJECT(cpu), &local_err); if (local_err) { goto error_unregister; } diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 1b0880ac9e..b56466f89a 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -93,7 +93,7 @@ static uint64_t *spapr_tce_alloc_table(uint32_t liobn, if (!table) { *fd = -1; - table = g_malloc0(nb_table * sizeof(uint64_t)); + table = g_new0(uint64_t, nb_table); } trace_spapr_iommu_new_table(liobn, table, *fd); diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c index e77b94cc68..7b3b5afec2 100644 --- a/hw/ppc/spapr_irq.c +++ b/hw/ppc/spapr_irq.c @@ -12,6 +12,7 @@ #include "qemu/error-report.h" #include "qapi/error.h" #include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_xive.h" #include "hw/ppc/xics.h" #include "sysemu/kvm.h" @@ -93,15 +94,9 @@ error: static void spapr_irq_init_xics(sPAPRMachineState *spapr, Error **errp) { MachineState *machine = MACHINE(spapr); - sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); - int nr_irqs = smc->irq->nr_irqs; + int nr_irqs = spapr->irq->nr_irqs; Error *local_err = NULL; - /* Initialize the MSI IRQ allocator. */ - if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { - spapr_irq_msi_init(spapr, smc->irq->nr_msis); - } - if (kvm_enabled()) { if (machine_kernel_irqchip_allowed(machine) && !xics_kvm_init(spapr, &local_err)) { @@ -195,6 +190,24 @@ static void spapr_irq_print_info_xics(sPAPRMachineState *spapr, Monitor *mon) ics_pic_print_info(spapr->ics, mon); } +static Object *spapr_irq_cpu_intc_create_xics(sPAPRMachineState *spapr, + Object *cpu, Error **errp) +{ + return icp_create(cpu, spapr->icp_type, XICS_FABRIC(spapr), errp); +} + +static int spapr_irq_post_load_xics(sPAPRMachineState *spapr, int version_id) +{ + if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) { + CPUState *cs; + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + icp_resend(ICP(cpu->intc)); + } + } + return 0; +} + #define SPAPR_IRQ_XICS_NR_IRQS 0x1000 #define SPAPR_IRQ_XICS_NR_MSIS \ (XICS_IRQ_BASE + SPAPR_IRQ_XICS_NR_IRQS - SPAPR_IRQ_MSI) @@ -202,37 +215,184 @@ static void spapr_irq_print_info_xics(sPAPRMachineState *spapr, Monitor *mon) sPAPRIrq spapr_irq_xics = { .nr_irqs = SPAPR_IRQ_XICS_NR_IRQS, .nr_msis = SPAPR_IRQ_XICS_NR_MSIS, + .ov5 = SPAPR_OV5_XIVE_LEGACY, .init = spapr_irq_init_xics, .claim = spapr_irq_claim_xics, .free = spapr_irq_free_xics, .qirq = spapr_qirq_xics, .print_info = spapr_irq_print_info_xics, + .dt_populate = spapr_dt_xics, + .cpu_intc_create = spapr_irq_cpu_intc_create_xics, + .post_load = spapr_irq_post_load_xics, +}; + +/* + * XIVE IRQ backend. + */ +static void spapr_irq_init_xive(sPAPRMachineState *spapr, Error **errp) +{ + MachineState *machine = MACHINE(spapr); + uint32_t nr_servers = spapr_max_server_number(spapr); + DeviceState *dev; + int i; + + /* KVM XIVE device not yet available */ + if (kvm_enabled()) { + if (machine_kernel_irqchip_required(machine)) { + error_setg(errp, "kernel_irqchip requested. no KVM XIVE support"); + return; + } + } + + dev = qdev_create(NULL, TYPE_SPAPR_XIVE); + qdev_prop_set_uint32(dev, "nr-irqs", spapr->irq->nr_irqs); + /* + * 8 XIVE END structures per CPU. One for each available priority + */ + qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3); + qdev_init_nofail(dev); + + spapr->xive = SPAPR_XIVE(dev); + + /* Enable the CPU IPIs */ + for (i = 0; i < nr_servers; ++i) { + spapr_xive_irq_claim(spapr->xive, SPAPR_IRQ_IPI + i, false); + } + + spapr_xive_hcall_init(spapr); +} + +static int spapr_irq_claim_xive(sPAPRMachineState *spapr, int irq, bool lsi, + Error **errp) +{ + if (!spapr_xive_irq_claim(spapr->xive, irq, lsi)) { + error_setg(errp, "IRQ %d is invalid", irq); + return -1; + } + return 0; +} + +static void spapr_irq_free_xive(sPAPRMachineState *spapr, int irq, int num) +{ + int i; + + for (i = irq; i < irq + num; ++i) { + spapr_xive_irq_free(spapr->xive, i); + } +} + +static qemu_irq spapr_qirq_xive(sPAPRMachineState *spapr, int irq) +{ + return spapr_xive_qirq(spapr->xive, irq); +} + +static void spapr_irq_print_info_xive(sPAPRMachineState *spapr, + Monitor *mon) +{ + CPUState *cs; + + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + + xive_tctx_pic_print_info(XIVE_TCTX(cpu->intc), mon); + } + + spapr_xive_pic_print_info(spapr->xive, mon); +} + +static Object *spapr_irq_cpu_intc_create_xive(sPAPRMachineState *spapr, + Object *cpu, Error **errp) +{ + Object *obj = xive_tctx_create(cpu, XIVE_ROUTER(spapr->xive), errp); + + /* + * (TCG) Early setting the OS CAM line for hotplugged CPUs as they + * don't benificiate from the reset of the XIVE IRQ backend + */ + spapr_xive_set_tctx_os_cam(XIVE_TCTX(obj)); + return obj; +} + +static int spapr_irq_post_load_xive(sPAPRMachineState *spapr, int version_id) +{ + return 0; +} + +static void spapr_irq_reset_xive(sPAPRMachineState *spapr, Error **errp) +{ + CPUState *cs; + + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + + /* (TCG) Set the OS CAM line of the thread interrupt context. */ + spapr_xive_set_tctx_os_cam(XIVE_TCTX(cpu->intc)); + } +} + +/* + * XIVE uses the full IRQ number space. Set it to 8K to be compatible + * with XICS. + */ + +#define SPAPR_IRQ_XIVE_NR_IRQS 0x2000 +#define SPAPR_IRQ_XIVE_NR_MSIS (SPAPR_IRQ_XIVE_NR_IRQS - SPAPR_IRQ_MSI) + +sPAPRIrq spapr_irq_xive = { + .nr_irqs = SPAPR_IRQ_XIVE_NR_IRQS, + .nr_msis = SPAPR_IRQ_XIVE_NR_MSIS, + .ov5 = SPAPR_OV5_XIVE_EXPLOIT, + + .init = spapr_irq_init_xive, + .claim = spapr_irq_claim_xive, + .free = spapr_irq_free_xive, + .qirq = spapr_qirq_xive, + .print_info = spapr_irq_print_info_xive, + .dt_populate = spapr_dt_xive, + .cpu_intc_create = spapr_irq_cpu_intc_create_xive, + .post_load = spapr_irq_post_load_xive, + .reset = spapr_irq_reset_xive, }; /* * sPAPR IRQ frontend routines for devices */ +void spapr_irq_init(sPAPRMachineState *spapr, Error **errp) +{ + /* Initialize the MSI IRQ allocator. */ + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + spapr_irq_msi_init(spapr, spapr->irq->nr_msis); + } + + spapr->irq->init(spapr, errp); +} int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp) { - sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); - - return smc->irq->claim(spapr, irq, lsi, errp); + return spapr->irq->claim(spapr, irq, lsi, errp); } void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num) { - sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); - - smc->irq->free(spapr, irq, num); + spapr->irq->free(spapr, irq, num); } qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq) { - sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + return spapr->irq->qirq(spapr, irq); +} + +int spapr_irq_post_load(sPAPRMachineState *spapr, int version_id) +{ + return spapr->irq->post_load(spapr, version_id); +} - return smc->irq->qirq(spapr, irq); +void spapr_irq_reset(sPAPRMachineState *spapr, Error **errp) +{ + if (spapr->irq->reset) { + spapr->irq->reset(spapr, errp); + } } /* @@ -295,10 +455,14 @@ int spapr_irq_find(sPAPRMachineState *spapr, int num, bool align, Error **errp) sPAPRIrq spapr_irq_xics_legacy = { .nr_irqs = SPAPR_IRQ_XICS_LEGACY_NR_IRQS, .nr_msis = SPAPR_IRQ_XICS_LEGACY_NR_IRQS, + .ov5 = SPAPR_OV5_XIVE_LEGACY, .init = spapr_irq_init_xics, .claim = spapr_irq_claim_xics, .free = spapr_irq_free_xics, .qirq = spapr_qirq_xics, .print_info = spapr_irq_print_info_xics, + .dt_populate = spapr_dt_xics, + .cpu_intc_create = spapr_irq_cpu_intc_create_xics, + .post_load = spapr_irq_post_load_xics, }; diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 2374d55fc1..bfb02ee96b 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1370,18 +1370,9 @@ static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev, /* Callback to be called during DRC release. */ void spapr_phb_remove_pci_device_cb(DeviceState *dev) { - /* some version guests do not wait for completion of a device - * cleanup (generally done asynchronously by the kernel) before - * signaling to QEMU that the device is safe, but instead sleep - * for some 'safe' period of time. unfortunately on a busy host - * this sleep isn't guaranteed to be long enough, resulting in - * bad things like IRQ lines being left asserted during final - * device removal. to deal with this we call reset just prior - * to finalizing the device, which will put the device back into - * an 'idle' state, as the device cleanup code expects. - */ - pci_device_reset(PCI_DEVICE(dev)); - object_unparent(OBJECT(dev)); + HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); + + hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); } static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb, @@ -1490,6 +1481,23 @@ out: } } +static void spapr_pci_unplug(HotplugHandler *plug_handler, + DeviceState *plugged_dev, Error **errp) +{ + /* some version guests do not wait for completion of a device + * cleanup (generally done asynchronously by the kernel) before + * signaling to QEMU that the device is safe, but instead sleep + * for some 'safe' period of time. unfortunately on a busy host + * this sleep isn't guaranteed to be long enough, resulting in + * bad things like IRQ lines being left asserted during final + * device removal. to deal with this we call reset just prior + * to finalizing the device, which will put the device back into + * an 'idle' state, as the device cleanup code expects. + */ + pci_device_reset(PCI_DEVICE(plugged_dev)); + object_unparent(OBJECT(plugged_dev)); +} + static void spapr_pci_unplug_request(HotplugHandler *plug_handler, DeviceState *plugged_dev, Error **errp) { @@ -1965,6 +1973,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) dc->user_creatable = true; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); hp->plug = spapr_pci_plug; + hp->unplug = spapr_pci_unplug; hp->unplug_request = spapr_pci_unplug_request; } diff --git a/hw/ppc/spapr_rtas_ddw.c b/hw/ppc/spapr_rtas_ddw.c index 329feb148f..cb8a410359 100644 --- a/hw/ppc/spapr_rtas_ddw.c +++ b/hw/ppc/spapr_rtas_ddw.c @@ -96,9 +96,8 @@ static void rtas_ibm_query_pe_dma_window(PowerPCCPU *cpu, uint32_t nret, target_ulong rets) { sPAPRPHBState *sphb; - uint64_t buid, max_window_size; + uint64_t buid; uint32_t avail, addr, pgmask = 0; - MachineState *machine = MACHINE(spapr); if ((nargs != 3) || (nret != 5)) { goto param_error_exit; @@ -114,27 +113,15 @@ static void rtas_ibm_query_pe_dma_window(PowerPCCPU *cpu, /* Translate page mask to LoPAPR format */ pgmask = spapr_page_mask_to_query_mask(sphb->page_size_mask); - /* - * This is "Largest contiguous block of TCEs allocated specifically - * for (that is, are reserved for) this PE". - * Return the maximum number as maximum supported RAM size was in 4K pages. - */ - if (machine->ram_size == machine->maxram_size) { - max_window_size = machine->ram_size; - } else { - max_window_size = machine->device_memory->base + - memory_region_size(&machine->device_memory->mr); - } - avail = SPAPR_PCI_DMA_MAX_WINDOWS - spapr_phb_get_active_win_num(sphb); rtas_st(rets, 0, RTAS_OUT_SUCCESS); rtas_st(rets, 1, avail); - rtas_st(rets, 2, max_window_size >> SPAPR_TCE_PAGE_SHIFT); + rtas_st(rets, 2, 0x80000000); /* The largest window we can possibly have */ rtas_st(rets, 3, pgmask); rtas_st(rets, 4, 0); /* DMA migration mask, not supported */ - trace_spapr_iommu_ddw_query(buid, addr, avail, max_window_size, pgmask); + trace_spapr_iommu_ddw_query(buid, addr, avail, 0x80000000, pgmask); return; param_error_exit: diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 840d4a3c45..7e8a9ad093 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -730,7 +730,7 @@ void spapr_dt_vdevice(VIOsPAPRBus *bus, void *fdt) } /* Copy out into an array of pointers */ - qdevs = g_malloc(sizeof(qdev) * num); + qdevs = g_new(DeviceState *, num); num = 0; QTAILQ_FOREACH(kid, &bus->bus.children, sibling) { qdevs[num++] = kid->child; diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c index ee9b4b4490..5177120574 100644 --- a/hw/ppc/virtex_ml507.c +++ b/hw/ppc/virtex_ml507.c @@ -105,7 +105,7 @@ static PowerPCCPU *ppc440_init_xilinx(ram_addr_t *ram_size, ppc_dcr_init(env, NULL, NULL); /* interrupt controller */ - irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB); + irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB); irqs[PPCUIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]; irqs[PPCUIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]; ppcuic_init(env, irqs, 0x0C0, 0, 1); diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index d7a4bbd91f..c28bfbd44d 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -15,10 +15,18 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" +#include "sysemu/sysemu.h" #include "qapi/error.h" +#include "qapi/qmp/qlist.h" +#include "qapi/qmp/qnum.h" +#include "qapi/qapi-events-rdma.h" #include <infiniband/verbs.h> +#include <infiniband/umad_types.h> +#include <infiniband/umad.h> +#include <rdma/rdma_user_cm.h> +#include "contrib/rdmacm-mux/rdmacm-mux.h" #include "trace.h" #include "rdma_utils.h" #include "rdma_rm.h" @@ -29,27 +37,46 @@ #define VENDOR_ERR_TOO_MANY_SGES 0x202 #define VENDOR_ERR_NOMEM 0x203 #define VENDOR_ERR_QP0 0x204 -#define VENDOR_ERR_NO_SGE 0x205 +#define VENDOR_ERR_INV_NUM_SGE 0x205 #define VENDOR_ERR_MAD_SEND 0x206 #define VENDOR_ERR_INVLKEY 0x207 #define VENDOR_ERR_MR_SMALL 0x208 +#define VENDOR_ERR_INV_MAD_BUFF 0x209 #define THR_NAME_LEN 16 #define THR_POLL_TO 5000 +#define MAD_HDR_SIZE sizeof(struct ibv_grh) + typedef struct BackendCtx { - uint64_t req_id; void *up_ctx; bool is_tx_req; + struct ibv_sge sge; /* Used to save MAD recv buffer */ } BackendCtx; -static void (*comp_handler)(int status, unsigned int vendor_err, void *ctx); +struct backend_umad { + struct ib_user_mad hdr; + char mad[RDMA_MAX_PRIVATE_DATA]; +}; + +static void (*comp_handler)(void *ctx, struct ibv_wc *wc); -static void dummy_comp_handler(int status, unsigned int vendor_err, void *ctx) +static void dummy_comp_handler(void *ctx, struct ibv_wc *wc) { pr_err("No completion handler is registered\n"); } +static inline void complete_work(enum ibv_wc_status status, uint32_t vendor_err, + void *ctx) +{ + struct ibv_wc wc = {0}; + + wc.status = status; + wc.vendor_err = vendor_err; + + comp_handler(ctx, &wc); +} + static void poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) { int i, ne; @@ -74,7 +101,7 @@ static void poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) } pr_dbg("Processing %s CQE\n", bctx->is_tx_req ? "send" : "recv"); - comp_handler(wc[i].status, wc[i].vendor_err, bctx->up_ctx); + comp_handler(bctx->up_ctx, &wc[i]); rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id); g_free(bctx); @@ -146,6 +173,77 @@ static void *comp_handler_thread(void *arg) return NULL; } +static inline void disable_rdmacm_mux_async(RdmaBackendDev *backend_dev) +{ + atomic_set(&backend_dev->rdmacm_mux.can_receive, 0); +} + +static inline void enable_rdmacm_mux_async(RdmaBackendDev *backend_dev) +{ + atomic_set(&backend_dev->rdmacm_mux.can_receive, sizeof(RdmaCmMuxMsg)); +} + +static inline int rdmacm_mux_can_process_async(RdmaBackendDev *backend_dev) +{ + return atomic_read(&backend_dev->rdmacm_mux.can_receive); +} + +static int check_mux_op_status(CharBackend *mad_chr_be) +{ + RdmaCmMuxMsg msg = {0}; + int ret; + + pr_dbg("Reading response\n"); + ret = qemu_chr_fe_read_all(mad_chr_be, (uint8_t *)&msg, sizeof(msg)); + if (ret != sizeof(msg)) { + pr_dbg("Invalid message size %d, expecting %ld\n", ret, sizeof(msg)); + return -EIO; + } + + pr_dbg("msg_type=%d\n", msg.hdr.msg_type); + pr_dbg("op_code=%d\n", msg.hdr.op_code); + pr_dbg("err_code=%d\n", msg.hdr.err_code); + + if (msg.hdr.msg_type != RDMACM_MUX_MSG_TYPE_RESP) { + pr_dbg("Invalid message type %d\n", msg.hdr.msg_type); + return -EIO; + } + + if (msg.hdr.err_code != RDMACM_MUX_ERR_CODE_OK) { + pr_dbg("Operation failed in mux, error code %d\n", msg.hdr.err_code); + return -EIO; + } + + return 0; +} + +static int exec_rdmacm_mux_req(RdmaBackendDev *backend_dev, RdmaCmMuxMsg *msg) +{ + int rc = 0; + + pr_dbg("Executing request %d\n", msg->hdr.op_code); + + msg->hdr.msg_type = RDMACM_MUX_MSG_TYPE_REQ; + disable_rdmacm_mux_async(backend_dev); + rc = qemu_chr_fe_write(backend_dev->rdmacm_mux.chr_be, + (const uint8_t *)msg, sizeof(*msg)); + if (rc != sizeof(*msg)) { + enable_rdmacm_mux_async(backend_dev); + pr_dbg("Fail to send request to rdmacm_mux (rc=%d)\n", rc); + return -EIO; + } + + rc = check_mux_op_status(backend_dev->rdmacm_mux.chr_be); + if (rc) { + pr_dbg("Fail to execute rdmacm_mux request %d (rc=%d)\n", + msg->hdr.op_code, rc); + } + + enable_rdmacm_mux_async(backend_dev); + + return 0; +} + static void stop_backend_thread(RdmaBackendThread *thread) { thread->run = false; @@ -168,8 +266,8 @@ static void start_comp_thread(RdmaBackendDev *backend_dev) comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED); } -void rdma_backend_register_comp_handler(void (*handler)(int status, - unsigned int vendor_err, void *ctx)) +void rdma_backend_register_comp_handler(void (*handler)(void *ctx, + struct ibv_wc *wc)) { comp_handler = handler; } @@ -286,11 +384,73 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res, return 0; } +static int mad_send(RdmaBackendDev *backend_dev, uint8_t sgid_idx, + union ibv_gid *sgid, struct ibv_sge *sge, uint32_t num_sge) +{ + RdmaCmMuxMsg msg = {0}; + char *hdr, *data; + int ret; + + pr_dbg("num_sge=%d\n", num_sge); + + if (num_sge != 2) { + return -EINVAL; + } + + msg.hdr.op_code = RDMACM_MUX_OP_CODE_MAD; + memcpy(msg.hdr.sgid.raw, sgid->raw, sizeof(msg.hdr.sgid)); + + msg.umad_len = sge[0].length + sge[1].length; + pr_dbg("umad_len=%d\n", msg.umad_len); + + if (msg.umad_len > sizeof(msg.umad.mad)) { + return -ENOMEM; + } + + msg.umad.hdr.addr.qpn = htobe32(1); + msg.umad.hdr.addr.grh_present = 1; + pr_dbg("sgid_idx=%d\n", sgid_idx); + pr_dbg("sgid=0x%llx\n", sgid->global.interface_id); + msg.umad.hdr.addr.gid_index = sgid_idx; + memcpy(msg.umad.hdr.addr.gid, sgid->raw, sizeof(msg.umad.hdr.addr.gid)); + msg.umad.hdr.addr.hop_limit = 0xFF; + + hdr = rdma_pci_dma_map(backend_dev->dev, sge[0].addr, sge[0].length); + if (!hdr) { + pr_dbg("Fail to map to sge[0]\n"); + return -ENOMEM; + } + data = rdma_pci_dma_map(backend_dev->dev, sge[1].addr, sge[1].length); + if (!data) { + pr_dbg("Fail to map to sge[1]\n"); + rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length); + return -ENOMEM; + } + + pr_dbg_buf("mad_hdr", hdr, sge[0].length); + pr_dbg_buf("mad_data", data, sge[1].length); + + memcpy(&msg.umad.mad[0], hdr, sge[0].length); + memcpy(&msg.umad.mad[sge[0].length], data, sge[1].length); + + rdma_pci_dma_unmap(backend_dev->dev, data, sge[1].length); + rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length); + + ret = exec_rdmacm_mux_req(backend_dev, &msg); + if (ret) { + pr_dbg("Fail to send MAD to rdma_umadmux (%d)\n", ret); + return -EIO; + } + + return 0; +} + void rdma_backend_post_send(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, struct ibv_sge *sge, uint32_t num_sge, - union ibv_gid *dgid, uint32_t dqpn, - uint32_t dqkey, void *ctx) + uint8_t sgid_idx, union ibv_gid *sgid, + union ibv_gid *dgid, uint32_t dqpn, uint32_t dqkey, + void *ctx) { BackendCtx *bctx; struct ibv_sge new_sge[MAX_SGE]; @@ -301,19 +461,23 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, if (!qp->ibqp) { /* This field does not get initialized for QP0 and QP1 */ if (qp_type == IBV_QPT_SMI) { pr_dbg("QP0 unsupported\n"); - comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx); + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx); } else if (qp_type == IBV_QPT_GSI) { pr_dbg("QP1\n"); - comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx); + rc = mad_send(backend_dev, sgid_idx, sgid, sge, num_sge); + if (rc) { + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx); + } else { + complete_work(IBV_WC_SUCCESS, 0, ctx); + } } - pr_dbg("qp->ibqp is NULL for qp_type %d!!!\n", qp_type); return; } pr_dbg("num_sge=%d\n", num_sge); - if (!num_sge) { - pr_dbg("num_sge=0\n"); - comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NO_SGE, ctx); + if (!num_sge || num_sge > MAX_SGE) { + pr_dbg("invalid num_sge=%d\n", num_sge); + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_NUM_SGE, ctx); return; } @@ -324,20 +488,23 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx); if (unlikely(rc)) { pr_dbg("Failed to allocate cqe_ctx\n"); - comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); goto out_free_bctx; } rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge); if (rc) { pr_dbg("Error: Failed to build host SGE array\n"); - comp_handler(IBV_WC_GENERAL_ERR, rc, ctx); + complete_work(IBV_WC_GENERAL_ERR, rc, ctx); goto out_dealloc_cqe_ctx; } if (qp_type == IBV_QPT_UD) { - wr.wr.ud.ah = create_ah(backend_dev, qp->ibpd, - backend_dev->backend_gid_idx, dgid); + wr.wr.ud.ah = create_ah(backend_dev, qp->ibpd, sgid_idx, dgid); + if (!wr.wr.ud.ah) { + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); + goto out_dealloc_cqe_ctx; + } wr.wr.ud.remote_qpn = dqpn; wr.wr.ud.remote_qkey = dqkey; } @@ -353,7 +520,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, if (rc) { pr_dbg("Fail (%d, %d) to post send WQE to qpn %d\n", rc, errno, qp->ibqp->qp_num); - comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); goto out_dealloc_cqe_ctx; } @@ -366,6 +533,48 @@ out_free_bctx: g_free(bctx); } +static unsigned int save_mad_recv_buffer(RdmaBackendDev *backend_dev, + struct ibv_sge *sge, uint32_t num_sge, + void *ctx) +{ + BackendCtx *bctx; + int rc; + uint32_t bctx_id; + + if (num_sge != 1) { + pr_dbg("Invalid num_sge (%d), expecting 1\n", num_sge); + return VENDOR_ERR_INV_NUM_SGE; + } + + if (sge[0].length < RDMA_MAX_PRIVATE_DATA + sizeof(struct ibv_grh)) { + pr_dbg("Too small buffer for MAD\n"); + return VENDOR_ERR_INV_MAD_BUFF; + } + + pr_dbg("addr=0x%" PRIx64"\n", sge[0].addr); + pr_dbg("length=%d\n", sge[0].length); + pr_dbg("lkey=%d\n", sge[0].lkey); + + bctx = g_malloc0(sizeof(*bctx)); + + rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx); + if (unlikely(rc)) { + g_free(bctx); + pr_dbg("Fail to allocate cqe_ctx\n"); + return VENDOR_ERR_NOMEM; + } + + pr_dbg("bctx_id %d, bctx %p, ctx %p\n", bctx_id, bctx, ctx); + bctx->up_ctx = ctx; + bctx->sge = *sge; + + qemu_mutex_lock(&backend_dev->recv_mads_list.lock); + qlist_append_int(backend_dev->recv_mads_list.list, bctx_id); + qemu_mutex_unlock(&backend_dev->recv_mads_list.lock); + + return 0; +} + void rdma_backend_post_recv(RdmaBackendDev *backend_dev, RdmaDeviceResources *rdma_dev_res, RdmaBackendQP *qp, uint8_t qp_type, @@ -380,19 +589,22 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, if (!qp->ibqp) { /* This field does not get initialized for QP0 and QP1 */ if (qp_type == IBV_QPT_SMI) { pr_dbg("QP0 unsupported\n"); - comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx); + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx); } if (qp_type == IBV_QPT_GSI) { pr_dbg("QP1\n"); - comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx); + rc = save_mad_recv_buffer(backend_dev, sge, num_sge, ctx); + if (rc) { + complete_work(IBV_WC_GENERAL_ERR, rc, ctx); + } } return; } pr_dbg("num_sge=%d\n", num_sge); - if (!num_sge) { - pr_dbg("num_sge=0\n"); - comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NO_SGE, ctx); + if (!num_sge || num_sge > MAX_SGE) { + pr_dbg("invalid num_sge=%d\n", num_sge); + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_NUM_SGE, ctx); return; } @@ -403,14 +615,14 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, rc = rdma_rm_alloc_cqe_ctx(rdma_dev_res, &bctx_id, bctx); if (unlikely(rc)) { pr_dbg("Failed to allocate cqe_ctx\n"); - comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); goto out_free_bctx; } rc = build_host_sge_array(rdma_dev_res, new_sge, sge, num_sge); if (rc) { pr_dbg("Error: Failed to build host SGE array\n"); - comp_handler(IBV_WC_GENERAL_ERR, rc, ctx); + complete_work(IBV_WC_GENERAL_ERR, rc, ctx); goto out_dealloc_cqe_ctx; } @@ -422,7 +634,7 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, if (rc) { pr_dbg("Fail (%d, %d) to post recv WQE to qpn %d\n", rc, errno, qp->ibqp->qp_num); - comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); goto out_dealloc_cqe_ctx; } @@ -513,7 +725,6 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, switch (qp_type) { case IBV_QPT_GSI: - pr_dbg("QP1 unsupported\n"); return 0; case IBV_QPT_RC: @@ -594,9 +805,9 @@ int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, } int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, - uint8_t qp_type, union ibv_gid *dgid, - uint32_t dqpn, uint32_t rq_psn, uint32_t qkey, - bool use_qkey) + uint8_t qp_type, uint8_t sgid_idx, + union ibv_gid *dgid, uint32_t dqpn, + uint32_t rq_psn, uint32_t qkey, bool use_qkey) { struct ibv_qp_attr attr = {0}; union ibv_gid ibv_gid = { @@ -608,13 +819,15 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, attr.qp_state = IBV_QPS_RTR; attr_mask = IBV_QP_STATE; + qp->sgid_idx = sgid_idx; + switch (qp_type) { case IBV_QPT_RC: pr_dbg("dgid=0x%" PRIx64 ",%" PRIx64 "\n", be64_to_cpu(ibv_gid.global.subnet_prefix), be64_to_cpu(ibv_gid.global.interface_id)); pr_dbg("dqpn=0x%x\n", dqpn); - pr_dbg("sgid_idx=%d\n", backend_dev->backend_gid_idx); + pr_dbg("sgid_idx=%d\n", qp->sgid_idx); pr_dbg("sport_num=%d\n", backend_dev->port_num); pr_dbg("rq_psn=0x%x\n", rq_psn); @@ -626,7 +839,7 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, attr.ah_attr.is_global = 1; attr.ah_attr.grh.hop_limit = 1; attr.ah_attr.grh.dgid = ibv_gid; - attr.ah_attr.grh.sgid_index = backend_dev->backend_gid_idx; + attr.ah_attr.grh.sgid_index = qp->sgid_idx; attr.rq_psn = rq_psn; attr_mask |= IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | @@ -635,8 +848,8 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, break; case IBV_QPT_UD: + pr_dbg("qkey=0x%x\n", qkey); if (use_qkey) { - pr_dbg("qkey=0x%x\n", qkey); attr.qkey = qkey; attr_mask |= IBV_QP_QKEY; } @@ -744,23 +957,224 @@ static int init_device_caps(RdmaBackendDev *backend_dev, return 0; } +static inline void build_mad_hdr(struct ibv_grh *grh, union ibv_gid *sgid, + union ibv_gid *my_gid, int paylen) +{ + grh->paylen = htons(paylen); + grh->sgid = *sgid; + grh->dgid = *my_gid; + + pr_dbg("paylen=%d (net=0x%x)\n", paylen, grh->paylen); + pr_dbg("dgid=0x%llx\n", my_gid->global.interface_id); + pr_dbg("sgid=0x%llx\n", sgid->global.interface_id); +} + +static void process_incoming_mad_req(RdmaBackendDev *backend_dev, + RdmaCmMuxMsg *msg) +{ + QObject *o_ctx_id; + unsigned long cqe_ctx_id; + BackendCtx *bctx; + char *mad; + + pr_dbg("umad_len=%d\n", msg->umad_len); + +#ifdef PVRDMA_DEBUG + struct umad_hdr *hdr = (struct umad_hdr *)&msg->umad.mad; + pr_dbg("bv %x cls %x cv %x mtd %x st %d tid %" PRIx64 " at %x atm %x\n", + hdr->base_version, hdr->mgmt_class, hdr->class_version, + hdr->method, hdr->status, be64toh(hdr->tid), + hdr->attr_id, hdr->attr_mod); +#endif + + qemu_mutex_lock(&backend_dev->recv_mads_list.lock); + o_ctx_id = qlist_pop(backend_dev->recv_mads_list.list); + qemu_mutex_unlock(&backend_dev->recv_mads_list.lock); + if (!o_ctx_id) { + pr_dbg("No more free MADs buffers, waiting for a while\n"); + sleep(THR_POLL_TO); + return; + } + + cqe_ctx_id = qnum_get_uint(qobject_to(QNum, o_ctx_id)); + bctx = rdma_rm_get_cqe_ctx(backend_dev->rdma_dev_res, cqe_ctx_id); + if (unlikely(!bctx)) { + pr_dbg("Error: Fail to find ctx for %ld\n", cqe_ctx_id); + return; + } + + pr_dbg("id %ld, bctx %p, ctx %p\n", cqe_ctx_id, bctx, bctx->up_ctx); + + mad = rdma_pci_dma_map(backend_dev->dev, bctx->sge.addr, + bctx->sge.length); + if (!mad || bctx->sge.length < msg->umad_len + MAD_HDR_SIZE) { + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_MAD_BUFF, + bctx->up_ctx); + } else { + struct ibv_wc wc = {0}; + pr_dbg_buf("mad", msg->umad.mad, msg->umad_len); + memset(mad, 0, bctx->sge.length); + build_mad_hdr((struct ibv_grh *)mad, + (union ibv_gid *)&msg->umad.hdr.addr.gid, &msg->hdr.sgid, + msg->umad_len); + memcpy(&mad[MAD_HDR_SIZE], msg->umad.mad, msg->umad_len); + rdma_pci_dma_unmap(backend_dev->dev, mad, bctx->sge.length); + + wc.byte_len = msg->umad_len; + wc.status = IBV_WC_SUCCESS; + wc.wc_flags = IBV_WC_GRH; + comp_handler(bctx->up_ctx, &wc); + } + + g_free(bctx); + rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, cqe_ctx_id); +} + +static inline int rdmacm_mux_can_receive(void *opaque) +{ + RdmaBackendDev *backend_dev = (RdmaBackendDev *)opaque; + + return rdmacm_mux_can_process_async(backend_dev); +} + +static void rdmacm_mux_read(void *opaque, const uint8_t *buf, int size) +{ + RdmaBackendDev *backend_dev = (RdmaBackendDev *)opaque; + RdmaCmMuxMsg *msg = (RdmaCmMuxMsg *)buf; + + pr_dbg("Got %d bytes\n", size); + pr_dbg("msg_type=%d\n", msg->hdr.msg_type); + pr_dbg("op_code=%d\n", msg->hdr.op_code); + + if (msg->hdr.msg_type != RDMACM_MUX_MSG_TYPE_REQ && + msg->hdr.op_code != RDMACM_MUX_OP_CODE_MAD) { + pr_dbg("Error: Not a MAD request, skipping\n"); + return; + } + process_incoming_mad_req(backend_dev, msg); +} + +static int mad_init(RdmaBackendDev *backend_dev, CharBackend *mad_chr_be) +{ + int ret; + + backend_dev->rdmacm_mux.chr_be = mad_chr_be; + + ret = qemu_chr_fe_backend_connected(backend_dev->rdmacm_mux.chr_be); + if (!ret) { + pr_dbg("Missing chardev for MAD multiplexer\n"); + return -EIO; + } + + qemu_mutex_init(&backend_dev->recv_mads_list.lock); + backend_dev->recv_mads_list.list = qlist_new(); + + enable_rdmacm_mux_async(backend_dev); + + qemu_chr_fe_set_handlers(backend_dev->rdmacm_mux.chr_be, + rdmacm_mux_can_receive, rdmacm_mux_read, NULL, + NULL, backend_dev, NULL, true); + + return 0; +} + +static void mad_fini(RdmaBackendDev *backend_dev) +{ + pr_dbg("Stopping MAD\n"); + disable_rdmacm_mux_async(backend_dev); + qemu_chr_fe_disconnect(backend_dev->rdmacm_mux.chr_be); + qlist_destroy_obj(QOBJECT(backend_dev->recv_mads_list.list)); + qemu_mutex_destroy(&backend_dev->recv_mads_list.lock); +} + +int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev, + union ibv_gid *gid) +{ + union ibv_gid sgid; + int ret; + int i = 0; + + pr_dbg("0x%llx, 0x%llx\n", + (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix), + (long long unsigned int)be64_to_cpu(gid->global.interface_id)); + + do { + ret = ibv_query_gid(backend_dev->context, backend_dev->port_num, i, + &sgid); + i++; + } while (!ret && (memcmp(&sgid, gid, sizeof(*gid)))); + + pr_dbg("gid_index=%d\n", i - 1); + + return ret ? ret : i - 1; +} + +int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname, + union ibv_gid *gid) +{ + RdmaCmMuxMsg msg = {0}; + int ret; + + pr_dbg("0x%llx, 0x%llx\n", + (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix), + (long long unsigned int)be64_to_cpu(gid->global.interface_id)); + + msg.hdr.op_code = RDMACM_MUX_OP_CODE_REG; + memcpy(msg.hdr.sgid.raw, gid->raw, sizeof(msg.hdr.sgid)); + + ret = exec_rdmacm_mux_req(backend_dev, &msg); + if (ret) { + pr_dbg("Fail to register GID to rdma_umadmux (%d)\n", ret); + return -EIO; + } + + qapi_event_send_rdma_gid_status_changed(ifname, true, + gid->global.subnet_prefix, + gid->global.interface_id); + + return ret; +} + +int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname, + union ibv_gid *gid) +{ + RdmaCmMuxMsg msg = {0}; + int ret; + + pr_dbg("0x%llx, 0x%llx\n", + (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix), + (long long unsigned int)be64_to_cpu(gid->global.interface_id)); + + msg.hdr.op_code = RDMACM_MUX_OP_CODE_UNREG; + memcpy(msg.hdr.sgid.raw, gid->raw, sizeof(msg.hdr.sgid)); + + ret = exec_rdmacm_mux_req(backend_dev, &msg); + if (ret) { + pr_dbg("Fail to unregister GID from rdma_umadmux (%d)\n", ret); + return -EIO; + } + + qapi_event_send_rdma_gid_status_changed(ifname, false, + gid->global.subnet_prefix, + gid->global.interface_id); + + return 0; +} + int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, RdmaDeviceResources *rdma_dev_res, const char *backend_device_name, uint8_t port_num, - uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr, + struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be, Error **errp) { int i; int ret = 0; int num_ibv_devices; struct ibv_device **dev_list; - struct ibv_port_attr port_attr; memset(backend_dev, 0, sizeof(*backend_dev)); backend_dev->dev = pdev; - - backend_dev->backend_gid_idx = backend_gid_idx; backend_dev->port_num = port_num; backend_dev->rdma_dev_res = rdma_dev_res; @@ -797,9 +1211,9 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, backend_dev->ib_dev = *dev_list; } - pr_dbg("Using backend device %s, port %d, gid_idx %d\n", - ibv_get_device_name(backend_dev->ib_dev), - backend_dev->port_num, backend_dev->backend_gid_idx); + pr_dbg("Using backend device %s, port %d\n", + ibv_get_device_name(backend_dev->ib_dev), backend_dev->port_num); + pr_dbg("uverb device %s\n", backend_dev->ib_dev->dev_name); backend_dev->context = ibv_open_device(backend_dev->ib_dev); if (!backend_dev->context) { @@ -816,20 +1230,6 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, } pr_dbg("dev->backend_dev.channel=%p\n", backend_dev->channel); - ret = ibv_query_port(backend_dev->context, backend_dev->port_num, - &port_attr); - if (ret) { - error_setg(errp, "Error %d from ibv_query_port", ret); - ret = -EIO; - goto out_destroy_comm_channel; - } - - if (backend_dev->backend_gid_idx >= port_attr.gid_tbl_len) { - error_setg(errp, "Invalid backend_gid_idx, should be less than %d", - port_attr.gid_tbl_len); - goto out_destroy_comm_channel; - } - ret = init_device_caps(backend_dev, dev_attr); if (ret) { error_setg(errp, "Failed to initialize device capabilities"); @@ -837,18 +1237,13 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, goto out_destroy_comm_channel; } - ret = ibv_query_gid(backend_dev->context, backend_dev->port_num, - backend_dev->backend_gid_idx, &backend_dev->gid); + + ret = mad_init(backend_dev, mad_chr_be); if (ret) { - error_setg(errp, "Failed to query gid %d", - backend_dev->backend_gid_idx); + error_setg(errp, "Fail to initialize mad"); ret = -EIO; goto out_destroy_comm_channel; } - pr_dbg("subnet_prefix=0x%" PRIx64 "\n", - be64_to_cpu(backend_dev->gid.global.subnet_prefix)); - pr_dbg("interface_id=0x%" PRIx64 "\n", - be64_to_cpu(backend_dev->gid.global.interface_id)); backend_dev->comp_thread.run = false; backend_dev->comp_thread.is_running = false; @@ -886,6 +1281,7 @@ void rdma_backend_stop(RdmaBackendDev *backend_dev) void rdma_backend_fini(RdmaBackendDev *backend_dev) { rdma_backend_stop(backend_dev); + mad_fini(backend_dev); g_hash_table_destroy(ah_hash); ibv_destroy_comp_channel(backend_dev->channel); ibv_close_device(backend_dev->context); diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index 86e8fe8ab6..8cae40f827 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -17,6 +17,8 @@ #define RDMA_BACKEND_H #include "qapi/error.h" +#include "chardev/char-fe.h" + #include "rdma_rm_defs.h" #include "rdma_backend_defs.h" @@ -26,14 +28,9 @@ enum ibv_special_qp_type { IBV_QPT_GSI = 1, }; -static inline union ibv_gid *rdma_backend_gid(RdmaBackendDev *dev) -{ - return &dev->gid; -} - static inline uint32_t rdma_backend_qpn(const RdmaBackendQP *qp) { - return qp->ibqp ? qp->ibqp->qp_num : 0; + return qp->ibqp ? qp->ibqp->qp_num : 1; } static inline uint32_t rdma_backend_mr_lkey(const RdmaBackendMR *mr) @@ -49,13 +46,19 @@ static inline uint32_t rdma_backend_mr_rkey(const RdmaBackendMR *mr) int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, RdmaDeviceResources *rdma_dev_res, const char *backend_device_name, uint8_t port_num, - uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr, + struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be, Error **errp); void rdma_backend_fini(RdmaBackendDev *backend_dev); +int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname, + union ibv_gid *gid); +int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname, + union ibv_gid *gid); +int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev, + union ibv_gid *gid); void rdma_backend_start(RdmaBackendDev *backend_dev); void rdma_backend_stop(RdmaBackendDev *backend_dev); -void rdma_backend_register_comp_handler(void (*handler)(int status, - unsigned int vendor_err, void *ctx)); +void rdma_backend_register_comp_handler(void (*handler)(void *ctx, + struct ibv_wc *wc)); void rdma_backend_unregister_comp_handler(void); int rdma_backend_query_port(RdmaBackendDev *backend_dev, @@ -80,9 +83,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, uint32_t qkey); int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, - uint8_t qp_type, union ibv_gid *dgid, - uint32_t dqpn, uint32_t rq_psn, uint32_t qkey, - bool use_qkey); + uint8_t qp_type, uint8_t sgid_idx, + union ibv_gid *dgid, uint32_t dqpn, + uint32_t rq_psn, uint32_t qkey, bool use_qkey); int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type, uint32_t sq_psn, uint32_t qkey, bool use_qkey); int rdma_backend_query_qp(RdmaBackendQP *qp, struct ibv_qp_attr *attr, @@ -92,6 +95,7 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp); void rdma_backend_post_send(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, struct ibv_sge *sge, uint32_t num_sge, + uint8_t sgid_idx, union ibv_gid *sgid, union ibv_gid *dgid, uint32_t dqpn, uint32_t dqkey, void *ctx); void rdma_backend_post_recv(RdmaBackendDev *backend_dev, diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h index 7404f64002..1e5c3dd3bf 100644 --- a/hw/rdma/rdma_backend_defs.h +++ b/hw/rdma/rdma_backend_defs.h @@ -16,8 +16,10 @@ #ifndef RDMA_BACKEND_DEFS_H #define RDMA_BACKEND_DEFS_H -#include <infiniband/verbs.h> #include "qemu/thread.h" +#include "chardev/char-fe.h" +#include <infiniband/verbs.h> +#include "contrib/rdmacm-mux/rdmacm-mux.h" typedef struct RdmaDeviceResources RdmaDeviceResources; @@ -28,17 +30,27 @@ typedef struct RdmaBackendThread { bool is_running; /* Set by the thread to report its status */ } RdmaBackendThread; +typedef struct RecvMadList { + QemuMutex lock; + QList *list; +} RecvMadList; + +typedef struct RdmaCmMux { + CharBackend *chr_be; + int can_receive; +} RdmaCmMux; + typedef struct RdmaBackendDev { struct ibv_device_attr dev_attr; RdmaBackendThread comp_thread; - union ibv_gid gid; PCIDevice *dev; RdmaDeviceResources *rdma_dev_res; struct ibv_device *ib_dev; struct ibv_context *context; struct ibv_comp_channel *channel; uint8_t port_num; - uint8_t backend_gid_idx; + RecvMadList recv_mads_list; + RdmaCmMux rdmacm_mux; } RdmaBackendDev; typedef struct RdmaBackendPD { @@ -58,6 +70,7 @@ typedef struct RdmaBackendCQ { typedef struct RdmaBackendQP { struct ibv_pd *ibpd; struct ibv_qp *ibqp; + uint8_t sgid_idx; } RdmaBackendQP; #endif diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index 8d59a42cd1..f5b1295890 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -43,7 +43,7 @@ static inline void res_tbl_free(RdmaRmResTbl *tbl) { qemu_mutex_destroy(&tbl->lock); g_free(tbl->tbl); - bitmap_zero_extend(tbl->bitmap, tbl->tbl_sz, 0); + g_free(tbl->bitmap); } static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle) @@ -263,7 +263,7 @@ int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, } cq->opaque = opaque; - cq->notify = false; + cq->notify = CNT_CLEAR; rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe); if (rc) { @@ -291,7 +291,10 @@ void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle, return; } - cq->notify = notify; + if (cq->notify != CNT_SET) { + cq->notify = notify ? CNT_ARM : CNT_CLEAR; + } + pr_dbg("notify=%d\n", cq->notify); } @@ -349,6 +352,11 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, return -EINVAL; } + if (qp_type == IBV_QPT_GSI) { + scq->notify = CNT_SET; + rcq->notify = CNT_SET; + } + qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn); if (!qp) { return -ENOMEM; @@ -383,7 +391,7 @@ out_dealloc_qp: } int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, - uint32_t qp_handle, uint32_t attr_mask, + uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx, union ibv_gid *dgid, uint32_t dqpn, enum ibv_qp_state qp_state, uint32_t qkey, uint32_t rq_psn, uint32_t sq_psn) @@ -392,6 +400,7 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, int ret; pr_dbg("qpn=0x%x\n", qp_handle); + pr_dbg("qkey=0x%x\n", qkey); qp = rdma_rm_get_qp(dev_res, qp_handle); if (!qp) { @@ -422,9 +431,19 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, } if (qp->qp_state == IBV_QPS_RTR) { + /* Get backend gid index */ + pr_dbg("Guest sgid_idx=%d\n", sgid_idx); + sgid_idx = rdma_rm_get_backend_gid_index(dev_res, backend_dev, + sgid_idx); + if (sgid_idx <= 0) { /* TODO check also less than bk.max_sgid */ + pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", sgid_idx); + return -EIO; + } + ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp, - qp->qp_type, dgid, dqpn, rq_psn, - qkey, attr_mask & IBV_QP_QKEY); + qp->qp_type, sgid_idx, dgid, dqpn, + rq_psn, qkey, + attr_mask & IBV_QP_QKEY); if (ret) { return -EIO; } @@ -515,11 +534,93 @@ void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id) res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id); } +int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + const char *ifname, union ibv_gid *gid, int gid_idx) +{ + int rc; + + rc = rdma_backend_add_gid(backend_dev, ifname, gid); + if (rc) { + pr_dbg("Fail to add gid\n"); + return -EINVAL; + } + + memcpy(&dev_res->port.gid_tbl[gid_idx].gid, gid, sizeof(*gid)); + + return 0; +} + +int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + const char *ifname, int gid_idx) +{ + int rc; + + if (!dev_res->port.gid_tbl[gid_idx].gid.global.interface_id) { + return 0; + } + + rc = rdma_backend_del_gid(backend_dev, ifname, + &dev_res->port.gid_tbl[gid_idx].gid); + if (rc) { + pr_dbg("Fail to delete gid\n"); + return -EINVAL; + } + + memset(dev_res->port.gid_tbl[gid_idx].gid.raw, 0, + sizeof(dev_res->port.gid_tbl[gid_idx].gid)); + dev_res->port.gid_tbl[gid_idx].backend_gid_index = -1; + + return 0; +} + +int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res, + RdmaBackendDev *backend_dev, int sgid_idx) +{ + if (unlikely(sgid_idx < 0 || sgid_idx > MAX_PORT_GIDS)) { + pr_dbg("Got invalid sgid_idx %d\n", sgid_idx); + return -EINVAL; + } + + if (unlikely(dev_res->port.gid_tbl[sgid_idx].backend_gid_index == -1)) { + dev_res->port.gid_tbl[sgid_idx].backend_gid_index = + rdma_backend_get_gid_index(backend_dev, + &dev_res->port.gid_tbl[sgid_idx].gid); + } + + pr_dbg("backend_gid_index=%d\n", + dev_res->port.gid_tbl[sgid_idx].backend_gid_index); + + return dev_res->port.gid_tbl[sgid_idx].backend_gid_index; +} + static void destroy_qp_hash_key(gpointer data) { g_bytes_unref(data); } +static void init_ports(RdmaDeviceResources *dev_res) +{ + int i; + + memset(&dev_res->port, 0, sizeof(dev_res->port)); + + dev_res->port.state = IBV_PORT_DOWN; + for (i = 0; i < MAX_PORT_GIDS; i++) { + dev_res->port.gid_tbl[i].backend_gid_index = -1; + } +} + +static void fini_ports(RdmaDeviceResources *dev_res, + RdmaBackendDev *backend_dev, const char *ifname) +{ + int i; + + dev_res->port.state = IBV_PORT_DOWN; + for (i = 0; i < MAX_PORT_GIDS; i++) { + rdma_rm_del_gid(dev_res, backend_dev, ifname, i); + } +} + int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr, Error **errp) { @@ -537,11 +638,16 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr, dev_attr->max_qp_wr, sizeof(void *)); res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC)); + init_ports(dev_res); + return 0; } -void rdma_rm_fini(RdmaDeviceResources *dev_res) +void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + const char *ifname) { + fini_ports(dev_res, backend_dev, ifname); + res_tbl_free(&dev_res->uc_tbl); res_tbl_free(&dev_res->cqe_ctx_tbl); res_tbl_free(&dev_res->qp_tbl); diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h index b4e04cc7b4..3c602c04c0 100644 --- a/hw/rdma/rdma_rm.h +++ b/hw/rdma/rdma_rm.h @@ -22,7 +22,8 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr, Error **errp); -void rdma_rm_fini(RdmaDeviceResources *dev_res); +void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + const char *ifname); int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, uint32_t *pd_handle, uint32_t ctx_handle); @@ -55,7 +56,7 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, uint32_t recv_cq_handle, void *opaque, uint32_t *qpn); RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn); int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, - uint32_t qp_handle, uint32_t attr_mask, + uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx, union ibv_gid *dgid, uint32_t dqpn, enum ibv_qp_state qp_state, uint32_t qkey, uint32_t rq_psn, uint32_t sq_psn); @@ -69,4 +70,16 @@ int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id, void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id); void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id); +int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + const char *ifname, union ibv_gid *gid, int gid_idx); +int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + const char *ifname, int gid_idx); +int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res, + RdmaBackendDev *backend_dev, int sgid_idx); +static inline union ibv_gid *rdma_rm_get_gid(RdmaDeviceResources *dev_res, + int sgid_idx) +{ + return &dev_res->port.gid_tbl[sgid_idx].gid; +} + #endif diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h index 7228151239..0ba61d1838 100644 --- a/hw/rdma/rdma_rm_defs.h +++ b/hw/rdma/rdma_rm_defs.h @@ -18,8 +18,8 @@ #include "rdma_backend_defs.h" -#define MAX_PORTS 1 -#define MAX_PORT_GIDS 1 +#define MAX_PORTS 1 /* Do not change - we support only one port */ +#define MAX_PORT_GIDS 255 #define MAX_GIDS MAX_PORT_GIDS #define MAX_PORT_PKEYS 1 #define MAX_PKEYS MAX_PORT_PKEYS @@ -49,10 +49,16 @@ typedef struct RdmaRmPD { uint32_t ctx_handle; } RdmaRmPD; +typedef enum CQNotificationType { + CNT_CLEAR, + CNT_ARM, + CNT_SET, +} CQNotificationType; + typedef struct RdmaRmCQ { RdmaBackendCQ backend_cq; void *opaque; - bool notify; + CQNotificationType notify; } RdmaRmCQ; /* MR (DMA region) */ @@ -80,13 +86,18 @@ typedef struct RdmaRmQP { enum ibv_qp_state qp_state; } RdmaRmQP; +typedef struct RdmaRmGid { + union ibv_gid gid; + int backend_gid_index; +} RdmaRmGid; + typedef struct RdmaRmPort { - union ibv_gid gid_tbl[MAX_PORT_GIDS]; + RdmaRmGid gid_tbl[MAX_PORT_GIDS]; enum ibv_port_state state; } RdmaRmPort; typedef struct RdmaDeviceResources { - RdmaRmPort ports[MAX_PORTS]; + RdmaRmPort port; RdmaRmResTbl pd_tbl; RdmaRmResTbl mr_tbl; RdmaRmResTbl uc_tbl; diff --git a/hw/rdma/rdma_utils.c b/hw/rdma/rdma_utils.c index dc23f158f3..4fbea8cde2 100644 --- a/hw/rdma/rdma_utils.c +++ b/hw/rdma/rdma_utils.c @@ -13,6 +13,7 @@ * */ +#include "qemu/osdep.h" #include "rdma_utils.h" #ifdef PVRDMA_DEBUG diff --git a/hw/rdma/rdma_utils.h b/hw/rdma/rdma_utils.h index 04c7c2ef5b..4490ea0b94 100644 --- a/hw/rdma/rdma_utils.h +++ b/hw/rdma/rdma_utils.h @@ -17,9 +17,9 @@ #ifndef RDMA_UTILS_H #define RDMA_UTILS_H -#include "qemu/osdep.h" #include "hw/pci/pci.h" #include "sysemu/dma.h" +#include "stdio.h" #define pr_info(fmt, ...) \ fprintf(stdout, "%s: %-20s (%3d): " fmt, "rdma", __func__, __LINE__,\ @@ -40,12 +40,36 @@ extern unsigned long pr_dbg_cnt; #define pr_dbg(fmt, ...) \ fprintf(stdout, "%lx %ld: %-20s (%3d): " fmt, pthread_self(), pr_dbg_cnt++, \ __func__, __LINE__, ## __VA_ARGS__) + +#define pr_dbg_buf(title, buf, len) \ +{ \ + int i; \ + char *b = g_malloc0(len * 3 + 1); \ + char b1[4]; \ + for (i = 0; i < len; i++) { \ + sprintf(b1, "%.2X ", buf[i] & 0x000000FF); \ + strcat(b, b1); \ + } \ + pr_dbg("%s (%d): %s\n", title, len, b); \ + g_free(b); \ +} + #else #define init_pr_dbg(void) #define pr_dbg(fmt, ...) +#define pr_dbg_buf(title, buf, len) #endif void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen); void rdma_pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len); +static inline void addrconf_addr_eui48(uint8_t *eui, const char *addr) +{ + memcpy(eui, addr, 3); + eui[3] = 0xFF; + eui[4] = 0xFE; + memcpy(eui + 5, addr + 3, 3); + eui[0] ^= 2; +} + #endif diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h index e2d9f93cdf..ffae36986e 100644 --- a/hw/rdma/vmw/pvrdma.h +++ b/hw/rdma/vmw/pvrdma.h @@ -17,8 +17,11 @@ #define PVRDMA_PVRDMA_H #include "qemu/units.h" +#include "qemu/notify.h" #include "hw/pci/pci.h" #include "hw/pci/msix.h" +#include "chardev/char-fe.h" +#include "hw/net/vmxnet3_defs.h" #include "../rdma_backend_defs.h" #include "../rdma_rm_defs.h" @@ -51,7 +54,7 @@ #define PVRDMA_FW_VERSION 14 /* Some defaults */ -#define PVRDMA_PKEY 0x7FFF +#define PVRDMA_PKEY 0xFFFF typedef struct DSRInfo { dma_addr_t dma; @@ -78,11 +81,14 @@ typedef struct PVRDMADev { int interrupt_mask; struct ibv_device_attr dev_attr; uint64_t node_guid; + char *backend_eth_device_name; char *backend_device_name; - uint8_t backend_gid_idx; uint8_t backend_port_num; RdmaBackendDev backend_dev; RdmaDeviceResources rdma_dev_res; + CharBackend mad_chr; + VMXNET3State *func0; + Notifier shutdown_notifier; } PVRDMADev; #define PVRDMA_DEV(dev) OBJECT_CHECK(PVRDMADev, (dev), PVRDMA_HW_NAME) diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index 4faeb21631..89920887bf 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -128,6 +128,9 @@ static int query_port(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_port_attr attrs = {0}; pr_dbg("port=%d\n", cmd->port_num); + if (cmd->port_num > MAX_PORTS) { + return -EINVAL; + } if (rdma_backend_query_port(&dev->backend_dev, (struct ibv_port_attr *)&attrs)) { @@ -135,11 +138,9 @@ static int query_port(PVRDMADev *dev, union pvrdma_cmd_req *req, } memset(resp, 0, sizeof(*resp)); - resp->hdr.response = cmd->hdr.response; - resp->hdr.ack = PVRDMA_CMD_QUERY_PORT_RESP; - resp->hdr.err = 0; - resp->attrs.state = attrs.state; + resp->attrs.state = dev->func0->device_active ? attrs.state : + PVRDMA_PORT_DOWN; resp->attrs.max_mtu = attrs.max_mtu; resp->attrs.active_mtu = attrs.active_mtu; resp->attrs.phys_state = attrs.phys_state; @@ -159,12 +160,16 @@ static int query_pkey(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_cmd_query_pkey_resp *resp = &rsp->query_pkey_resp; pr_dbg("port=%d\n", cmd->port_num); + if (cmd->port_num > MAX_PORTS) { + return -EINVAL; + } + pr_dbg("index=%d\n", cmd->index); + if (cmd->index > MAX_PKEYS) { + return -EINVAL; + } memset(resp, 0, sizeof(*resp)); - resp->hdr.response = cmd->hdr.response; - resp->hdr.ack = PVRDMA_CMD_QUERY_PKEY_RESP; - resp->hdr.err = 0; resp->pkey = PVRDMA_PKEY; pr_dbg("pkey=0x%x\n", resp->pkey); @@ -177,17 +182,15 @@ static int create_pd(PVRDMADev *dev, union pvrdma_cmd_req *req, { struct pvrdma_cmd_create_pd *cmd = &req->create_pd; struct pvrdma_cmd_create_pd_resp *resp = &rsp->create_pd_resp; + int rc; pr_dbg("context=0x%x\n", cmd->ctx_handle ? cmd->ctx_handle : 0); memset(resp, 0, sizeof(*resp)); - resp->hdr.response = cmd->hdr.response; - resp->hdr.ack = PVRDMA_CMD_CREATE_PD_RESP; - resp->hdr.err = rdma_rm_alloc_pd(&dev->rdma_dev_res, &dev->backend_dev, - &resp->pd_handle, cmd->ctx_handle); + rc = rdma_rm_alloc_pd(&dev->rdma_dev_res, &dev->backend_dev, + &resp->pd_handle, cmd->ctx_handle); - pr_dbg("ret=%d\n", resp->hdr.err); - return resp->hdr.err; + return rc; } static int destroy_pd(PVRDMADev *dev, union pvrdma_cmd_req *req, @@ -209,10 +212,9 @@ static int create_mr(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_cmd_create_mr_resp *resp = &rsp->create_mr_resp; PCIDevice *pci_dev = PCI_DEVICE(dev); void *host_virt = NULL; + int rc = 0; memset(resp, 0, sizeof(*resp)); - resp->hdr.response = cmd->hdr.response; - resp->hdr.ack = PVRDMA_CMD_CREATE_MR_RESP; pr_dbg("pd_handle=%d\n", cmd->pd_handle); pr_dbg("access_flags=0x%x\n", cmd->access_flags); @@ -223,22 +225,18 @@ static int create_mr(PVRDMADev *dev, union pvrdma_cmd_req *req, cmd->length); if (!host_virt) { pr_dbg("Failed to map to pdir\n"); - resp->hdr.err = -EINVAL; - goto out; + return -EINVAL; } } - resp->hdr.err = rdma_rm_alloc_mr(&dev->rdma_dev_res, cmd->pd_handle, - cmd->start, cmd->length, host_virt, - cmd->access_flags, &resp->mr_handle, - &resp->lkey, &resp->rkey); - if (host_virt && !resp->hdr.err) { + rc = rdma_rm_alloc_mr(&dev->rdma_dev_res, cmd->pd_handle, cmd->start, + cmd->length, host_virt, cmd->access_flags, + &resp->mr_handle, &resp->lkey, &resp->rkey); + if (rc && host_virt) { munmap(host_virt, cmd->length); } -out: - pr_dbg("ret=%d\n", resp->hdr.err); - return resp->hdr.err; + return rc; } static int destroy_mr(PVRDMADev *dev, union pvrdma_cmd_req *req, @@ -261,6 +259,11 @@ static int create_cq_ring(PCIDevice *pci_dev , PvrdmaRing **ring, int rc = -EINVAL; char ring_name[MAX_RING_NAME_SZ]; + if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) { + pr_dbg("invalid nchunks: %d\n", nchunks); + return rc; + } + pr_dbg("pdir_dma=0x%llx\n", (long long unsigned int)pdir_dma); dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE); if (!dir) { @@ -310,34 +313,43 @@ out: return rc; } +static void destroy_cq_ring(PvrdmaRing *ring) +{ + pvrdma_ring_free(ring); + /* ring_state was in slot 1, not 0 so need to jump back */ + rdma_pci_dma_unmap(ring->dev, --ring->ring_state, TARGET_PAGE_SIZE); + g_free(ring); +} + static int create_cq(PVRDMADev *dev, union pvrdma_cmd_req *req, union pvrdma_cmd_resp *rsp) { struct pvrdma_cmd_create_cq *cmd = &req->create_cq; struct pvrdma_cmd_create_cq_resp *resp = &rsp->create_cq_resp; PvrdmaRing *ring = NULL; + int rc; memset(resp, 0, sizeof(*resp)); - resp->hdr.response = cmd->hdr.response; - resp->hdr.ack = PVRDMA_CMD_CREATE_CQ_RESP; resp->cqe = cmd->cqe; - resp->hdr.err = create_cq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma, - cmd->nchunks, cmd->cqe); - if (resp->hdr.err) { - goto out; + rc = create_cq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma, cmd->nchunks, + cmd->cqe); + if (rc) { + return rc; } pr_dbg("ring=%p\n", ring); - resp->hdr.err = rdma_rm_alloc_cq(&dev->rdma_dev_res, &dev->backend_dev, - cmd->cqe, &resp->cq_handle, ring); + rc = rdma_rm_alloc_cq(&dev->rdma_dev_res, &dev->backend_dev, cmd->cqe, + &resp->cq_handle, ring); + if (rc) { + destroy_cq_ring(ring); + } + resp->cqe = cmd->cqe; -out: - pr_dbg("ret=%d\n", resp->hdr.err); - return resp->hdr.err; + return rc; } static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req, @@ -356,10 +368,7 @@ static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req, } ring = (PvrdmaRing *)cq->opaque; - pvrdma_ring_free(ring); - /* ring_state was in slot 1, not 0 so need to jump back */ - rdma_pci_dma_unmap(PCI_DEVICE(dev), --ring->ring_state, TARGET_PAGE_SIZE); - g_free(ring); + destroy_cq_ring(ring); rdma_rm_dealloc_cq(&dev->rdma_dev_res, cmd->cq_handle); @@ -377,6 +386,12 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma, char ring_name[MAX_RING_NAME_SZ]; uint32_t wqe_sz; + if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES + || !rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES) { + pr_dbg("invalid pages: %d, %d\n", spages, rpages); + return rc; + } + pr_dbg("pdir_dma=0x%llx\n", (long long unsigned int)pdir_dma); dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE); if (!dir) { @@ -451,36 +466,49 @@ out: return rc; } +static void destroy_qp_rings(PvrdmaRing *ring) +{ + pr_dbg("sring=%p\n", &ring[0]); + pvrdma_ring_free(&ring[0]); + pr_dbg("rring=%p\n", &ring[1]); + pvrdma_ring_free(&ring[1]); + + rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE); + g_free(ring); +} + static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, union pvrdma_cmd_resp *rsp) { struct pvrdma_cmd_create_qp *cmd = &req->create_qp; struct pvrdma_cmd_create_qp_resp *resp = &rsp->create_qp_resp; PvrdmaRing *rings = NULL; + int rc; memset(resp, 0, sizeof(*resp)); - resp->hdr.response = cmd->hdr.response; - resp->hdr.ack = PVRDMA_CMD_CREATE_QP_RESP; pr_dbg("total_chunks=%d\n", cmd->total_chunks); pr_dbg("send_chunks=%d\n", cmd->send_chunks); - resp->hdr.err = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings, - cmd->max_send_wr, cmd->max_send_sge, - cmd->send_chunks, cmd->max_recv_wr, - cmd->max_recv_sge, cmd->total_chunks - - cmd->send_chunks - 1); - if (resp->hdr.err) { - goto out; + rc = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings, + cmd->max_send_wr, cmd->max_send_sge, cmd->send_chunks, + cmd->max_recv_wr, cmd->max_recv_sge, + cmd->total_chunks - cmd->send_chunks - 1); + if (rc) { + return rc; } pr_dbg("rings=%p\n", rings); - resp->hdr.err = rdma_rm_alloc_qp(&dev->rdma_dev_res, cmd->pd_handle, - cmd->qp_type, cmd->max_send_wr, - cmd->max_send_sge, cmd->send_cq_handle, - cmd->max_recv_wr, cmd->max_recv_sge, - cmd->recv_cq_handle, rings, &resp->qpn); + rc = rdma_rm_alloc_qp(&dev->rdma_dev_res, cmd->pd_handle, cmd->qp_type, + cmd->max_send_wr, cmd->max_send_sge, + cmd->send_cq_handle, cmd->max_recv_wr, + cmd->max_recv_sge, cmd->recv_cq_handle, rings, + &resp->qpn); + if (rc) { + destroy_qp_rings(rings); + return rc; + } resp->max_send_wr = cmd->max_send_wr; resp->max_recv_wr = cmd->max_recv_wr; @@ -488,32 +516,31 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, resp->max_recv_sge = cmd->max_recv_sge; resp->max_inline_data = cmd->max_inline_data; -out: - pr_dbg("ret=%d\n", resp->hdr.err); - return resp->hdr.err; + return 0; } static int modify_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, union pvrdma_cmd_resp *rsp) { struct pvrdma_cmd_modify_qp *cmd = &req->modify_qp; + int rc; pr_dbg("qp_handle=%d\n", cmd->qp_handle); memset(rsp, 0, sizeof(*rsp)); - rsp->hdr.response = cmd->hdr.response; - rsp->hdr.ack = PVRDMA_CMD_MODIFY_QP_RESP; - - rsp->hdr.err = rdma_rm_modify_qp(&dev->rdma_dev_res, &dev->backend_dev, - cmd->qp_handle, cmd->attr_mask, - (union ibv_gid *)&cmd->attrs.ah_attr.grh.dgid, - cmd->attrs.dest_qp_num, - (enum ibv_qp_state)cmd->attrs.qp_state, - cmd->attrs.qkey, cmd->attrs.rq_psn, - cmd->attrs.sq_psn); - - pr_dbg("ret=%d\n", rsp->hdr.err); - return rsp->hdr.err; + + /* No need to verify sgid_index since it is u8 */ + + rc = rdma_rm_modify_qp(&dev->rdma_dev_res, &dev->backend_dev, + cmd->qp_handle, cmd->attr_mask, + cmd->attrs.ah_attr.grh.sgid_index, + (union ibv_gid *)&cmd->attrs.ah_attr.grh.dgid, + cmd->attrs.dest_qp_num, + (enum ibv_qp_state)cmd->attrs.qp_state, + cmd->attrs.qkey, cmd->attrs.rq_psn, + cmd->attrs.sq_psn); + + return rc; } static int query_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, @@ -522,21 +549,18 @@ static int query_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_cmd_query_qp *cmd = &req->query_qp; struct pvrdma_cmd_query_qp_resp *resp = &rsp->query_qp_resp; struct ibv_qp_init_attr init_attr; + int rc; pr_dbg("qp_handle=%d\n", cmd->qp_handle); pr_dbg("attr_mask=0x%x\n", cmd->attr_mask); memset(rsp, 0, sizeof(*rsp)); - rsp->hdr.response = cmd->hdr.response; - rsp->hdr.ack = PVRDMA_CMD_QUERY_QP_RESP; - rsp->hdr.err = rdma_rm_query_qp(&dev->rdma_dev_res, &dev->backend_dev, - cmd->qp_handle, - (struct ibv_qp_attr *)&resp->attrs, - cmd->attr_mask, &init_attr); + rc = rdma_rm_query_qp(&dev->rdma_dev_res, &dev->backend_dev, cmd->qp_handle, + (struct ibv_qp_attr *)&resp->attrs, cmd->attr_mask, + &init_attr); - pr_dbg("ret=%d\n", rsp->hdr.err); - return rsp->hdr.err; + return rc; } static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, @@ -555,13 +579,7 @@ static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle); ring = (PvrdmaRing *)qp->opaque; - pr_dbg("sring=%p\n", &ring[0]); - pvrdma_ring_free(&ring[0]); - pr_dbg("rring=%p\n", &ring[1]); - pvrdma_ring_free(&ring[1]); - - rdma_pci_dma_unmap(PCI_DEVICE(dev), ring->ring_state, TARGET_PAGE_SIZE); - g_free(ring); + destroy_qp_rings(ring); return 0; } @@ -570,10 +588,8 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, union pvrdma_cmd_resp *rsp) { struct pvrdma_cmd_create_bind *cmd = &req->create_bind; -#ifdef PVRDMA_DEBUG - __be64 *subnet = (__be64 *)&cmd->new_gid[0]; - __be64 *if_id = (__be64 *)&cmd->new_gid[8]; -#endif + int rc; + union ibv_gid *gid = (union ibv_gid *)&cmd->new_gid; pr_dbg("index=%d\n", cmd->index); @@ -582,26 +598,20 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, } pr_dbg("gid[%d]=0x%llx,0x%llx\n", cmd->index, - (long long unsigned int)be64_to_cpu(*subnet), - (long long unsigned int)be64_to_cpu(*if_id)); - - /* Driver forces to one port only */ - memcpy(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw, &cmd->new_gid, - sizeof(cmd->new_gid)); + (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix), + (long long unsigned int)be64_to_cpu(gid->global.interface_id)); - /* TODO: Since drivers stores node_guid at load_dsr phase then this - * assignment is not relevant, i need to figure out a way how to - * retrieve MAC of our netdev */ - dev->node_guid = dev->rdma_dev_res.ports[0].gid_tbl[0].global.interface_id; - pr_dbg("dev->node_guid=0x%llx\n", - (long long unsigned int)be64_to_cpu(dev->node_guid)); + rc = rdma_rm_add_gid(&dev->rdma_dev_res, &dev->backend_dev, + dev->backend_eth_device_name, gid, cmd->index); - return 0; + return rc; } static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, union pvrdma_cmd_resp *rsp) { + int rc; + struct pvrdma_cmd_destroy_bind *cmd = &req->destroy_bind; pr_dbg("index=%d\n", cmd->index); @@ -610,10 +620,10 @@ static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, return -EINVAL; } - memset(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw, 0, - sizeof(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw)); + rc = rdma_rm_del_gid(&dev->rdma_dev_res, &dev->backend_dev, + dev->backend_eth_device_name, cmd->index); - return 0; + return rc; } static int create_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, @@ -621,18 +631,14 @@ static int create_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, { struct pvrdma_cmd_create_uc *cmd = &req->create_uc; struct pvrdma_cmd_create_uc_resp *resp = &rsp->create_uc_resp; + int rc; pr_dbg("pfn=%d\n", cmd->pfn); memset(resp, 0, sizeof(*resp)); - resp->hdr.response = cmd->hdr.response; - resp->hdr.ack = PVRDMA_CMD_CREATE_UC_RESP; - resp->hdr.err = rdma_rm_alloc_uc(&dev->rdma_dev_res, cmd->pfn, - &resp->ctx_handle); - - pr_dbg("ret=%d\n", resp->hdr.err); + rc = rdma_rm_alloc_uc(&dev->rdma_dev_res, cmd->pfn, &resp->ctx_handle); - return 0; + return rc; } static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, @@ -646,30 +652,32 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, return 0; } + struct cmd_handler { uint32_t cmd; + uint32_t ack; int (*exec)(PVRDMADev *dev, union pvrdma_cmd_req *req, union pvrdma_cmd_resp *rsp); }; static struct cmd_handler cmd_handlers[] = { - {PVRDMA_CMD_QUERY_PORT, query_port}, - {PVRDMA_CMD_QUERY_PKEY, query_pkey}, - {PVRDMA_CMD_CREATE_PD, create_pd}, - {PVRDMA_CMD_DESTROY_PD, destroy_pd}, - {PVRDMA_CMD_CREATE_MR, create_mr}, - {PVRDMA_CMD_DESTROY_MR, destroy_mr}, - {PVRDMA_CMD_CREATE_CQ, create_cq}, - {PVRDMA_CMD_RESIZE_CQ, NULL}, - {PVRDMA_CMD_DESTROY_CQ, destroy_cq}, - {PVRDMA_CMD_CREATE_QP, create_qp}, - {PVRDMA_CMD_MODIFY_QP, modify_qp}, - {PVRDMA_CMD_QUERY_QP, query_qp}, - {PVRDMA_CMD_DESTROY_QP, destroy_qp}, - {PVRDMA_CMD_CREATE_UC, create_uc}, - {PVRDMA_CMD_DESTROY_UC, destroy_uc}, - {PVRDMA_CMD_CREATE_BIND, create_bind}, - {PVRDMA_CMD_DESTROY_BIND, destroy_bind}, + {PVRDMA_CMD_QUERY_PORT, PVRDMA_CMD_QUERY_PORT_RESP, query_port}, + {PVRDMA_CMD_QUERY_PKEY, PVRDMA_CMD_QUERY_PKEY_RESP, query_pkey}, + {PVRDMA_CMD_CREATE_PD, PVRDMA_CMD_CREATE_PD_RESP, create_pd}, + {PVRDMA_CMD_DESTROY_PD, PVRDMA_CMD_DESTROY_PD_RESP_NOOP, destroy_pd}, + {PVRDMA_CMD_CREATE_MR, PVRDMA_CMD_CREATE_MR_RESP, create_mr}, + {PVRDMA_CMD_DESTROY_MR, PVRDMA_CMD_DESTROY_MR_RESP_NOOP, destroy_mr}, + {PVRDMA_CMD_CREATE_CQ, PVRDMA_CMD_CREATE_CQ_RESP, create_cq}, + {PVRDMA_CMD_RESIZE_CQ, PVRDMA_CMD_RESIZE_CQ_RESP, NULL}, + {PVRDMA_CMD_DESTROY_CQ, PVRDMA_CMD_DESTROY_CQ_RESP_NOOP, destroy_cq}, + {PVRDMA_CMD_CREATE_QP, PVRDMA_CMD_CREATE_QP_RESP, create_qp}, + {PVRDMA_CMD_MODIFY_QP, PVRDMA_CMD_MODIFY_QP_RESP, modify_qp}, + {PVRDMA_CMD_QUERY_QP, PVRDMA_CMD_QUERY_QP_RESP, query_qp}, + {PVRDMA_CMD_DESTROY_QP, PVRDMA_CMD_DESTROY_QP_RESP, destroy_qp}, + {PVRDMA_CMD_CREATE_UC, PVRDMA_CMD_CREATE_UC_RESP, create_uc}, + {PVRDMA_CMD_DESTROY_UC, PVRDMA_CMD_DESTROY_UC_RESP_NOOP, destroy_uc}, + {PVRDMA_CMD_CREATE_BIND, PVRDMA_CMD_CREATE_BIND_RESP_NOOP, create_bind}, + {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind}, }; int execute_command(PVRDMADev *dev) @@ -692,7 +700,12 @@ int execute_command(PVRDMADev *dev) } err = cmd_handlers[dsr_info->req->hdr.cmd].exec(dev, dsr_info->req, - dsr_info->rsp); + dsr_info->rsp); + dsr_info->rsp->hdr.response = dsr_info->req->hdr.response; + dsr_info->rsp->hdr.ack = cmd_handlers[dsr_info->req->hdr.cmd].ack; + dsr_info->rsp->hdr.err = err < 0 ? -err : 0; + pr_dbg("rsp->hdr.err=%d\n", dsr_info->rsp->hdr.err); + out: set_reg_val(dev, PVRDMA_REG_ERR, err); post_interrupt(dev, INTR_VEC_CMD_RING); diff --git a/hw/rdma/vmw/pvrdma_dev_ring.c b/hw/rdma/vmw/pvrdma_dev_ring.c index 01247fc041..e8e5b502f6 100644 --- a/hw/rdma/vmw/pvrdma_dev_ring.c +++ b/hw/rdma/vmw/pvrdma_dev_ring.c @@ -73,23 +73,16 @@ out: void *pvrdma_ring_next_elem_read(PvrdmaRing *ring) { + int e; unsigned int idx = 0, offset; - /* - pr_dbg("%s: t=%d, h=%d\n", ring->name, ring->ring_state->prod_tail, - ring->ring_state->cons_head); - */ - - if (!pvrdma_idx_ring_has_data(ring->ring_state, ring->max_elems, &idx)) { + e = pvrdma_idx_ring_has_data(ring->ring_state, ring->max_elems, &idx); + if (e <= 0) { pr_dbg("No more data in ring\n"); return NULL; } offset = idx * ring->elem_sz; - /* - pr_dbg("idx=%d\n", idx); - pr_dbg("offset=%d\n", offset); - */ return ring->pages[offset / TARGET_PAGE_SIZE] + (offset % TARGET_PAGE_SIZE); } @@ -105,20 +98,20 @@ void pvrdma_ring_read_inc(PvrdmaRing *ring) void *pvrdma_ring_next_elem_write(PvrdmaRing *ring) { - unsigned int idx, offset, tail; + int idx; + unsigned int offset, tail; - /* - pr_dbg("%s: t=%d, h=%d\n", ring->name, ring->ring_state->prod_tail, - ring->ring_state->cons_head); - */ - - if (!pvrdma_idx_ring_has_space(ring->ring_state, ring->max_elems, &tail)) { + idx = pvrdma_idx_ring_has_space(ring->ring_state, ring->max_elems, &tail); + if (idx <= 0) { pr_dbg("CQ is full\n"); return NULL; } idx = pvrdma_idx(&ring->ring_state->prod_tail, ring->max_elems); - /* TODO: tail == idx */ + if (idx < 0 || tail != idx) { + pr_dbg("invalid idx\n"); + return NULL; + } offset = idx * ring->elem_sz; return ring->pages[offset / TARGET_PAGE_SIZE] + (offset % TARGET_PAGE_SIZE); diff --git a/hw/rdma/vmw/pvrdma_dev_ring.h b/hw/rdma/vmw/pvrdma_dev_ring.h index 411d244603..5f2a0cf9b9 100644 --- a/hw/rdma/vmw/pvrdma_dev_ring.h +++ b/hw/rdma/vmw/pvrdma_dev_ring.h @@ -16,7 +16,6 @@ #ifndef PVRDMA_DEV_RING_H #define PVRDMA_DEV_RING_H -#include "qemu/typedefs.h" #define MAX_RING_NAME_SZ 32 diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index ca5fa8d981..838ad8a949 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -24,6 +24,7 @@ #include "hw/qdev-properties.h" #include "cpu.h" #include "trace.h" +#include "sysemu/sysemu.h" #include "../rdma_rm.h" #include "../rdma_backend.h" @@ -36,9 +37,9 @@ #include "pvrdma_qp_ops.h" static Property pvrdma_dev_properties[] = { - DEFINE_PROP_STRING("backend-dev", PVRDMADev, backend_device_name), - DEFINE_PROP_UINT8("backend-port", PVRDMADev, backend_port_num, 1), - DEFINE_PROP_UINT8("backend-gid-idx", PVRDMADev, backend_gid_idx, 0), + DEFINE_PROP_STRING("netdev", PVRDMADev, backend_eth_device_name), + DEFINE_PROP_STRING("ibdev", PVRDMADev, backend_device_name), + DEFINE_PROP_UINT8("ibport", PVRDMADev, backend_port_num, 1), DEFINE_PROP_UINT64("dev-caps-max-mr-size", PVRDMADev, dev_attr.max_mr_size, MAX_MR_SIZE), DEFINE_PROP_INT32("dev-caps-max-qp", PVRDMADev, dev_attr.max_qp, MAX_QP), @@ -51,6 +52,7 @@ static Property pvrdma_dev_properties[] = { DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev, dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM), DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH), + DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr), DEFINE_PROP_END_OF_LIST(), }; @@ -263,7 +265,7 @@ static void init_dsr_dev_caps(PVRDMADev *dev) dsr->caps.sys_image_guid = 0; pr_dbg("sys_image_guid=%" PRIx64 "\n", dsr->caps.sys_image_guid); - dsr->caps.node_guid = cpu_to_be64(dev->node_guid); + dsr->caps.node_guid = dev->node_guid; pr_dbg("node_guid=%" PRIx64 "\n", be64_to_cpu(dsr->caps.node_guid)); dsr->caps.phys_port_cnt = MAX_PORTS; @@ -275,17 +277,6 @@ static void init_dsr_dev_caps(PVRDMADev *dev) pr_dbg("Initialized\n"); } -static void init_ports(PVRDMADev *dev, Error **errp) -{ - int i; - - memset(dev->rdma_dev_res.ports, 0, sizeof(dev->rdma_dev_res.ports)); - - for (i = 0; i < MAX_PORTS; i++) { - dev->rdma_dev_res.ports[i].state = IBV_PORT_DOWN; - } -} - static void uninit_msix(PCIDevice *pdev, int used_vectors) { PVRDMADev *dev = PVRDMA_DEV(pdev); @@ -334,7 +325,8 @@ static void pvrdma_fini(PCIDevice *pdev) pvrdma_qp_ops_fini(); - rdma_rm_fini(&dev->rdma_dev_res); + rdma_rm_fini(&dev->rdma_dev_res, &dev->backend_dev, + dev->backend_eth_device_name); rdma_backend_fini(&dev->backend_dev); @@ -343,6 +335,9 @@ static void pvrdma_fini(PCIDevice *pdev) if (msix_enabled(pdev)) { uninit_msix(pdev, RDMA_MAX_INTRS); } + + pr_dbg("Device %s %x.%x is down\n", pdev->name, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn)); } static void pvrdma_stop(PVRDMADev *dev) @@ -368,13 +363,11 @@ static int unquiesce_device(PVRDMADev *dev) return 0; } -static int reset_device(PVRDMADev *dev) +static void reset_device(PVRDMADev *dev) { pvrdma_stop(dev); pr_dbg("Device reset complete\n"); - - return 0; } static uint64_t regs_read(void *opaque, hwaddr addr, unsigned size) @@ -455,6 +448,11 @@ static const MemoryRegionOps regs_ops = { }, }; +static uint64_t uar_read(void *opaque, hwaddr addr, unsigned size) +{ + return 0xffffffff; +} + static void uar_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { PVRDMADev *dev = opaque; @@ -496,6 +494,7 @@ static void uar_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) } static const MemoryRegionOps uar_ops = { + .read = uar_read, .write = uar_write, .endianness = DEVICE_LITTLE_ENDIAN, .impl = { @@ -570,12 +569,21 @@ static int pvrdma_check_ram_shared(Object *obj, void *opaque) return 0; } +static void pvrdma_shutdown_notifier(Notifier *n, void *opaque) +{ + PVRDMADev *dev = container_of(n, PVRDMADev, shutdown_notifier); + PCIDevice *pci_dev = PCI_DEVICE(dev); + + pvrdma_fini(pci_dev); +} + static void pvrdma_realize(PCIDevice *pdev, Error **errp) { - int rc; + int rc = 0; PVRDMADev *dev = PVRDMA_DEV(pdev); Object *memdev_root; bool ram_shared = false; + PCIDevice *func0; init_pr_dbg(); @@ -587,6 +595,20 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) return; } + func0 = pci_get_function_0(pdev); + /* Break if not vmxnet3 device in slot 0 */ + if (strcmp(object_get_typename(&func0->qdev.parent_obj), TYPE_VMXNET3)) { + pr_dbg("func0 type is %s\n", + object_get_typename(&func0->qdev.parent_obj)); + error_setg(errp, "Device on %x.0 must be %s", PCI_SLOT(pdev->devfn), + TYPE_VMXNET3); + return; + } + dev->func0 = VMXNET3(func0); + + addrconf_addr_eui48((unsigned char *)&dev->node_guid, + (const char *)&dev->func0->conf.macaddr.a); + memdev_root = object_resolve_path("/objects", NULL); if (memdev_root) { object_child_foreach(memdev_root, pvrdma_check_ram_shared, &ram_shared); @@ -613,7 +635,7 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res, dev->backend_device_name, dev->backend_port_num, - dev->backend_gid_idx, &dev->dev_attr, errp); + &dev->dev_attr, &dev->mad_chr, errp); if (rc) { goto out; } @@ -623,15 +645,17 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) goto out; } - init_ports(dev, errp); - rc = pvrdma_qp_ops_init(); if (rc) { goto out; } + dev->shutdown_notifier.notify = pvrdma_shutdown_notifier; + qemu_register_shutdown_notifier(&dev->shutdown_notifier); + out: if (rc) { + pvrdma_fini(pdev); error_append_hint(errp, "Device fail to load\n"); } } diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c index c668afd0ed..300471a4c9 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.c +++ b/hw/rdma/vmw/pvrdma_qp_ops.c @@ -47,7 +47,7 @@ typedef struct PvrdmaRqWqe { * 3. Interrupt host */ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, - struct pvrdma_cqe *cqe) + struct pvrdma_cqe *cqe, struct ibv_wc *wc) { struct pvrdma_cqe *cqe1; struct pvrdma_cqne *cqne; @@ -66,6 +66,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, pr_dbg("Writing CQE\n"); cqe1 = pvrdma_ring_next_elem_write(ring); if (unlikely(!cqe1)) { + pr_dbg("No CQEs in ring\n"); return -EINVAL; } @@ -73,8 +74,20 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, cqe1->wr_id = cqe->wr_id; cqe1->qp = cqe->qp; cqe1->opcode = cqe->opcode; - cqe1->status = cqe->status; - cqe1->vendor_err = cqe->vendor_err; + cqe1->status = wc->status; + cqe1->byte_len = wc->byte_len; + cqe1->src_qp = wc->src_qp; + cqe1->wc_flags = wc->wc_flags; + cqe1->vendor_err = wc->vendor_err; + + pr_dbg("wr_id=%" PRIx64 "\n", cqe1->wr_id); + pr_dbg("qp=0x%lx\n", cqe1->qp); + pr_dbg("opcode=%d\n", cqe1->opcode); + pr_dbg("status=%d\n", cqe1->status); + pr_dbg("byte_len=%d\n", cqe1->byte_len); + pr_dbg("src_qp=%d\n", cqe1->src_qp); + pr_dbg("wc_flags=%d\n", cqe1->wc_flags); + pr_dbg("vendor_err=%d\n", cqe1->vendor_err); pvrdma_ring_write_inc(ring); @@ -89,26 +102,22 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, pvrdma_ring_write_inc(&dev->dsr_info.cq); pr_dbg("cq->notify=%d\n", cq->notify); - if (cq->notify) { - cq->notify = false; + if (cq->notify != CNT_CLEAR) { + if (cq->notify == CNT_ARM) { + cq->notify = CNT_CLEAR; + } post_interrupt(dev, INTR_VEC_CMD_COMPLETION_Q); } return 0; } -static void pvrdma_qp_ops_comp_handler(int status, unsigned int vendor_err, - void *ctx) +static void pvrdma_qp_ops_comp_handler(void *ctx, struct ibv_wc *wc) { CompHandlerCtx *comp_ctx = (CompHandlerCtx *)ctx; - pr_dbg("cq_handle=%d\n", comp_ctx->cq_handle); - pr_dbg("wr_id=%" PRIx64 "\n", comp_ctx->cqe.wr_id); - pr_dbg("status=%d\n", status); - pr_dbg("vendor_err=0x%x\n", vendor_err); - comp_ctx->cqe.status = status; - comp_ctx->cqe.vendor_err = vendor_err; - pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe); + pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe, wc); + g_free(ctx); } @@ -129,6 +138,8 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) RdmaRmQP *qp; PvrdmaSqWqe *wqe; PvrdmaRing *ring; + int sgid_idx; + union ibv_gid *sgid; pr_dbg("qp_handle=0x%x\n", qp_handle); @@ -152,10 +163,28 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) comp_ctx->cq_handle = qp->send_cq_handle; comp_ctx->cqe.wr_id = wqe->hdr.wr_id; comp_ctx->cqe.qp = qp_handle; - comp_ctx->cqe.opcode = wqe->hdr.opcode; + comp_ctx->cqe.opcode = IBV_WC_SEND; + + sgid = rdma_rm_get_gid(&dev->rdma_dev_res, wqe->hdr.wr.ud.av.gid_index); + if (!sgid) { + pr_dbg("Fail to get gid for idx %d\n", wqe->hdr.wr.ud.av.gid_index); + return -EIO; + } + pr_dbg("sgid_id=%d, sgid=0x%llx\n", wqe->hdr.wr.ud.av.gid_index, + sgid->global.interface_id); + + sgid_idx = rdma_rm_get_backend_gid_index(&dev->rdma_dev_res, + &dev->backend_dev, + wqe->hdr.wr.ud.av.gid_index); + if (sgid_idx <= 0) { + pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", + wqe->hdr.wr.ud.av.gid_index); + return -EIO; + } rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type, (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge, + sgid_idx, sgid, (union ibv_gid *)wqe->hdr.wr.ud.av.dgid, wqe->hdr.wr.ud.remote_qpn, wqe->hdr.wr.ud.remote_qkey, comp_ctx); @@ -194,8 +223,9 @@ int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) comp_ctx = g_malloc(sizeof(CompHandlerCtx)); comp_ctx->dev = dev; comp_ctx->cq_handle = qp->recv_cq_handle; - comp_ctx->cqe.qp = qp_handle; comp_ctx->cqe.wr_id = wqe->hdr.wr_id; + comp_ctx->cqe.qp = qp_handle; + comp_ctx->cqe.opcode = IBV_WC_RECV; rdma_backend_post_recv(&dev->backend_dev, &dev->rdma_dev_res, &qp->backend_qp, qp->qp_type, diff --git a/hw/s390x/css.c b/hw/s390x/css.c index 04ec5cc970..f92b046cd3 100644 --- a/hw/s390x/css.c +++ b/hw/s390x/css.c @@ -1290,9 +1290,19 @@ void copy_scsw_to_guest(SCSW *dest, const SCSW *src) static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src) { int i; - - copy_pmcw_to_guest(&dest->pmcw, &src->pmcw); - copy_scsw_to_guest(&dest->scsw, &src->scsw); + /* + * We copy the PMCW and SCSW in and out of local variables to + * avoid taking the address of members of a packed struct. + */ + PMCW src_pmcw, dest_pmcw; + SCSW src_scsw, dest_scsw; + + src_pmcw = src->pmcw; + copy_pmcw_to_guest(&dest_pmcw, &src_pmcw); + dest->pmcw = dest_pmcw; + src_scsw = src->scsw; + copy_scsw_to_guest(&dest_scsw, &src_scsw); + dest->scsw = dest_scsw; dest->mba = cpu_to_be64(src->mba); for (i = 0; i < ARRAY_SIZE(dest->mda); i++) { dest->mda[i] = src->mda[i]; @@ -1339,9 +1349,19 @@ static void copy_scsw_from_guest(SCSW *dest, const SCSW *src) static void copy_schib_from_guest(SCHIB *dest, const SCHIB *src) { int i; - - copy_pmcw_from_guest(&dest->pmcw, &src->pmcw); - copy_scsw_from_guest(&dest->scsw, &src->scsw); + /* + * We copy the PMCW and SCSW in and out of local variables to + * avoid taking the address of members of a packed struct. + */ + PMCW src_pmcw, dest_pmcw; + SCSW src_scsw, dest_scsw; + + src_pmcw = src->pmcw; + copy_pmcw_from_guest(&dest_pmcw, &src_pmcw); + dest->pmcw = dest_pmcw; + src_scsw = src->scsw; + copy_scsw_from_guest(&dest_scsw, &src_scsw); + dest->scsw = dest_scsw; dest->mba = be64_to_cpu(src->mba); for (i = 0; i < ARRAY_SIZE(dest->mda); i++) { dest->mda[i] = src->mda[i]; diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index f7458445c0..15759b6514 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -823,8 +823,8 @@ static bool s390_pci_alloc_idx(S390pciState *s, S390PCIBusDevice *pbdev) return true; } -static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) { PCIDevice *pdev = NULL; S390PCIBusDevice *pbdev = NULL; @@ -932,8 +932,8 @@ static void s390_pcihost_timer_cb(void *opaque) qdev_unplug(DEVICE(pbdev), NULL); } -static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) { PCIDevice *pci_dev = NULL; PCIBus *bus; @@ -1041,8 +1041,8 @@ static void s390_pcihost_class_init(ObjectClass *klass, void *data) dc->reset = s390_pcihost_reset; dc->realize = s390_pcihost_realize; - hc->plug = s390_pcihost_hot_plug; - hc->unplug = s390_pcihost_hot_unplug; + hc->plug = s390_pcihost_plug; + hc->unplug = s390_pcihost_unplug; msi_nonbroken = true; } diff --git a/hw/smbios/smbios-stub.c b/hw/smbios/smbios-stub.c index d3a385441a..64e5ba93ec 100644 --- a/hw/smbios/smbios-stub.c +++ b/hw/smbios/smbios-stub.c @@ -23,7 +23,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qapi/qmp/qerror.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" void smbios_entry_add(QemuOpts *opts, Error **errp) { diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index 04811279a0..818be8a838 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -24,11 +24,10 @@ #include "sysemu/sysemu.h" #include "qemu/uuid.h" #include "sysemu/cpus.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "hw/loader.h" #include "exec/cpu-common.h" #include "smbios_build.h" -#include "hw/smbios/ipmi.h" /* legacy structures and constants for <= 2.0 machines */ struct smbios_header { diff --git a/hw/smbios/smbios_build.h b/hw/smbios/smbios_build.h index 93b360d520..56b5a1e3f3 100644 --- a/hw/smbios/smbios_build.h +++ b/hw/smbios/smbios_build.h @@ -3,6 +3,7 @@ * * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. * Copyright (C) 2013 Red Hat, Inc. + * Copyright (c) 2015,2016 Corey Minyard, MontaVista Software, LLC * * Authors: * Alex Williamson <alex.williamson@hp.com> @@ -96,4 +97,7 @@ extern unsigned smbios_table_cnt; smbios_table_cnt++; \ } while (0) +/* IPMI SMBIOS firmware handling */ +void smbios_build_type_38_table(void); + #endif /* QEMU_SMBIOS_BUILD_H */ diff --git a/hw/smbios/smbios_type_38-stub.c b/hw/smbios/smbios_type_38-stub.c index 5b83c9b1f1..14b53d004b 100644 --- a/hw/smbios/smbios_type_38-stub.c +++ b/hw/smbios/smbios_type_38-stub.c @@ -8,7 +8,7 @@ */ #include "qemu/osdep.h" -#include "hw/smbios/ipmi.h" +#include "smbios_build.h" void smbios_build_type_38_table(void) { diff --git a/hw/smbios/smbios_type_38.c b/hw/smbios/smbios_type_38.c index 56e8609c00..0c08f282de 100644 --- a/hw/smbios/smbios_type_38.c +++ b/hw/smbios/smbios_type_38.c @@ -9,8 +9,7 @@ #include "qemu/osdep.h" #include "hw/ipmi/ipmi.h" -#include "hw/smbios/ipmi.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "qemu/error-report.h" #include "smbios_build.h" diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 0a25f5e096..6166ccd47a 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -10,9 +10,9 @@ * directory. */ +#include "qemu/osdep.h" #include <linux/vfio.h> #include <sys/ioctl.h> -#include "qemu/osdep.h" #include "qapi/error.h" #include "hw/sysbus.h" #include "hw/vfio/vfio.h" diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 5c7bd96984..c0cb1ec289 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1897,15 +1897,10 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size, PCI_EXP_TYPE_ENDPOINT << 4, PCI_EXP_FLAGS_TYPE); vfio_add_emulated_long(vdev, pos + PCI_EXP_LNKCAP, - PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25, ~0); + QEMU_PCI_EXP_LNKCAP_MLW(QEMU_PCI_EXP_LNK_X1) | + QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), ~0); vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0); } - - /* Mark the Link Status bits as emulated to allow virtual negotiation */ - vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKSTA, - pci_get_word(vdev->pdev.config + pos + - PCI_EXP_LNKSTA), - PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS); } /* diff --git a/hw/virtio/virtio-crypto-pci.c b/hw/virtio/virtio-crypto-pci.c index bf64996e48..8cc3fa3ef7 100644 --- a/hw/virtio/virtio-crypto-pci.c +++ b/hw/virtio/virtio-crypto-pci.c @@ -64,9 +64,8 @@ static void virtio_crypto_initfn(Object *obj) TYPE_VIRTIO_CRYPTO); } -static const TypeInfo virtio_crypto_pci_info = { - .name = TYPE_VIRTIO_CRYPTO_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_crypto_pci_info = { + .generic_name = TYPE_VIRTIO_CRYPTO_PCI, .instance_size = sizeof(VirtIOCryptoPCI), .instance_init = virtio_crypto_initfn, .class_init = virtio_crypto_pci_class_init, @@ -74,6 +73,6 @@ static const TypeInfo virtio_crypto_pci_info = { static void virtio_crypto_pci_register_types(void) { - type_register_static(&virtio_crypto_pci_info); + virtio_pci_types_register(&virtio_crypto_pci_info); } type_init(virtio_crypto_pci_register_types) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index a954799267..d05066deb8 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1119,9 +1119,11 @@ static void virtio_9p_pci_instance_init(Object *obj) TYPE_VIRTIO_9P); } -static const TypeInfo virtio_9p_pci_info = { - .name = TYPE_VIRTIO_9P_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_9p_pci_info = { + .base_name = TYPE_VIRTIO_9P_PCI, + .generic_name = "virtio-9p-pci", + .transitional_name = "virtio-9p-pci-transitional", + .non_transitional_name = "virtio-9p-pci-non-transitional", .instance_size = sizeof(V9fsPCIState), .instance_init = virtio_9p_pci_instance_init, .class_init = virtio_9p_pci_class_init, @@ -1877,9 +1879,6 @@ static void virtio_pci_reset(DeviceState *qdev) static Property virtio_pci_properties[] = { DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false), - DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy, - ON_OFF_AUTO_AUTO), - DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false), DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true), DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags, @@ -1939,13 +1938,123 @@ static const TypeInfo virtio_pci_info = { .class_init = virtio_pci_class_init, .class_size = sizeof(VirtioPCIClass), .abstract = true, - .interfaces = (InterfaceInfo[]) { - { INTERFACE_PCIE_DEVICE }, - { INTERFACE_CONVENTIONAL_PCI_DEVICE }, - { } - }, }; +static Property virtio_pci_generic_properties[] = { + DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_pci_base_class_init(ObjectClass *klass, void *data) +{ + const VirtioPCIDeviceTypeInfo *t = data; + if (t->class_init) { + t->class_init(klass, NULL); + } +} + +static void virtio_pci_generic_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->props = virtio_pci_generic_properties; +} + +/* Used when the generic type and the base type is the same */ +static void virtio_pci_generic_base_class_init(ObjectClass *klass, void *data) +{ + virtio_pci_base_class_init(klass, data); + virtio_pci_generic_class_init(klass, NULL); +} + +static void virtio_pci_transitional_instance_init(Object *obj) +{ + VirtIOPCIProxy *proxy = VIRTIO_PCI(obj); + + proxy->disable_legacy = ON_OFF_AUTO_OFF; + proxy->disable_modern = false; +} + +static void virtio_pci_non_transitional_instance_init(Object *obj) +{ + VirtIOPCIProxy *proxy = VIRTIO_PCI(obj); + + proxy->disable_legacy = ON_OFF_AUTO_ON; + proxy->disable_modern = false; +} + +void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) +{ + TypeInfo base_type_info = { + .name = t->base_name, + .parent = t->parent ? t->parent : TYPE_VIRTIO_PCI, + .instance_size = t->instance_size, + .instance_init = t->instance_init, + .class_init = virtio_pci_base_class_init, + .class_data = (void *)t, + .abstract = true, + }; + TypeInfo generic_type_info = { + .name = t->generic_name, + .parent = base_type_info.name, + .class_init = virtio_pci_generic_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { } + }, + }; + + if (!base_type_info.name) { + /* No base type -> register a single generic device type */ + base_type_info.name = t->generic_name; + base_type_info.class_init = virtio_pci_generic_base_class_init; + base_type_info.interfaces = generic_type_info.interfaces; + base_type_info.abstract = false; + generic_type_info.name = NULL; + assert(!t->non_transitional_name); + assert(!t->transitional_name); + } + + type_register(&base_type_info); + if (generic_type_info.name) { + type_register(&generic_type_info); + } + + if (t->non_transitional_name) { + const TypeInfo non_transitional_type_info = { + .name = t->non_transitional_name, + .parent = base_type_info.name, + .instance_init = virtio_pci_non_transitional_instance_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { } + }, + }; + type_register(&non_transitional_type_info); + } + + if (t->transitional_name) { + const TypeInfo transitional_type_info = { + .name = t->transitional_name, + .parent = base_type_info.name, + .instance_init = virtio_pci_transitional_instance_init, + .interfaces = (InterfaceInfo[]) { + /* + * Transitional virtio devices work only as Conventional PCI + * devices because they require PIO ports. + */ + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { } + }, + }; + type_register(&transitional_type_info); + } +} + /* virtio-blk-pci */ static Property virtio_blk_pci_properties[] = { @@ -1995,9 +2104,11 @@ static void virtio_blk_pci_instance_init(Object *obj) "bootindex", &error_abort); } -static const TypeInfo virtio_blk_pci_info = { - .name = TYPE_VIRTIO_BLK_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_blk_pci_info = { + .base_name = TYPE_VIRTIO_BLK_PCI, + .generic_name = "virtio-blk-pci", + .transitional_name = "virtio-blk-pci-transitional", + .non_transitional_name = "virtio-blk-pci-non-transitional", .instance_size = sizeof(VirtIOBlkPCI), .instance_init = virtio_blk_pci_instance_init, .class_init = virtio_blk_pci_class_init, @@ -2051,9 +2162,11 @@ static void vhost_user_blk_pci_instance_init(Object *obj) "bootindex", &error_abort); } -static const TypeInfo vhost_user_blk_pci_info = { - .name = TYPE_VHOST_USER_BLK_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo vhost_user_blk_pci_info = { + .base_name = TYPE_VHOST_USER_BLK_PCI, + .generic_name = "vhost-user-blk-pci", + .transitional_name = "vhost-user-blk-pci-transitional", + .non_transitional_name = "vhost-user-blk-pci-non-transitional", .instance_size = sizeof(VHostUserBlkPCI), .instance_init = vhost_user_blk_pci_instance_init, .class_init = vhost_user_blk_pci_class_init, @@ -2119,9 +2232,11 @@ static void virtio_scsi_pci_instance_init(Object *obj) TYPE_VIRTIO_SCSI); } -static const TypeInfo virtio_scsi_pci_info = { - .name = TYPE_VIRTIO_SCSI_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_scsi_pci_info = { + .base_name = TYPE_VIRTIO_SCSI_PCI, + .generic_name = "virtio-scsi-pci", + .transitional_name = "virtio-scsi-pci-transitional", + .non_transitional_name = "virtio-scsi-pci-non-transitional", .instance_size = sizeof(VirtIOSCSIPCI), .instance_init = virtio_scsi_pci_instance_init, .class_init = virtio_scsi_pci_class_init, @@ -2174,9 +2289,11 @@ static void vhost_scsi_pci_instance_init(Object *obj) "bootindex", &error_abort); } -static const TypeInfo vhost_scsi_pci_info = { - .name = TYPE_VHOST_SCSI_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo vhost_scsi_pci_info = { + .base_name = TYPE_VHOST_SCSI_PCI, + .generic_name = "vhost-scsi-pci", + .transitional_name = "vhost-scsi-pci-transitional", + .non_transitional_name = "vhost-scsi-pci-non-transitional", .instance_size = sizeof(VHostSCSIPCI), .instance_init = vhost_scsi_pci_instance_init, .class_init = vhost_scsi_pci_class_init, @@ -2229,9 +2346,11 @@ static void vhost_user_scsi_pci_instance_init(Object *obj) "bootindex", &error_abort); } -static const TypeInfo vhost_user_scsi_pci_info = { - .name = TYPE_VHOST_USER_SCSI_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo vhost_user_scsi_pci_info = { + .base_name = TYPE_VHOST_USER_SCSI_PCI, + .generic_name = "vhost-user-scsi-pci", + .transitional_name = "vhost-user-scsi-pci-transitional", + .non_transitional_name = "vhost-user-scsi-pci-non-transitional", .instance_size = sizeof(VHostUserSCSIPCI), .instance_init = vhost_user_scsi_pci_instance_init, .class_init = vhost_user_scsi_pci_class_init, @@ -2277,9 +2396,11 @@ static void vhost_vsock_pci_instance_init(Object *obj) TYPE_VHOST_VSOCK); } -static const TypeInfo vhost_vsock_pci_info = { - .name = TYPE_VHOST_VSOCK_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo vhost_vsock_pci_info = { + .base_name = TYPE_VHOST_VSOCK_PCI, + .generic_name = "vhost-vsock-pci", + .transitional_name = "vhost-vsock-pci-transitional", + .non_transitional_name = "vhost-vsock-pci-non-transitional", .instance_size = sizeof(VHostVSockPCI), .instance_init = vhost_vsock_pci_instance_init, .class_init = vhost_vsock_pci_class_init, @@ -2334,9 +2455,11 @@ static void virtio_balloon_pci_instance_init(Object *obj) "guest-stats-polling-interval", &error_abort); } -static const TypeInfo virtio_balloon_pci_info = { - .name = TYPE_VIRTIO_BALLOON_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_balloon_pci_info = { + .base_name = TYPE_VIRTIO_BALLOON_PCI, + .generic_name = "virtio-balloon-pci", + .transitional_name = "virtio-balloon-pci-transitional", + .non_transitional_name = "virtio-balloon-pci-non-transitional", .instance_size = sizeof(VirtIOBalloonPCI), .instance_init = virtio_balloon_pci_instance_init, .class_init = virtio_balloon_pci_class_init, @@ -2407,9 +2530,11 @@ static void virtio_serial_pci_instance_init(Object *obj) TYPE_VIRTIO_SERIAL); } -static const TypeInfo virtio_serial_pci_info = { - .name = TYPE_VIRTIO_SERIAL_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_serial_pci_info = { + .base_name = TYPE_VIRTIO_SERIAL_PCI, + .generic_name = "virtio-serial-pci", + .transitional_name = "virtio-serial-pci-transitional", + .non_transitional_name = "virtio-serial-pci-non-transitional", .instance_size = sizeof(VirtIOSerialPCI), .instance_init = virtio_serial_pci_instance_init, .class_init = virtio_serial_pci_class_init, @@ -2462,9 +2587,11 @@ static void virtio_net_pci_instance_init(Object *obj) "bootindex", &error_abort); } -static const TypeInfo virtio_net_pci_info = { - .name = TYPE_VIRTIO_NET_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_net_pci_info = { + .base_name = TYPE_VIRTIO_NET_PCI, + .generic_name = "virtio-net-pci", + .transitional_name = "virtio-net-pci-transitional", + .non_transitional_name = "virtio-net-pci-non-transitional", .instance_size = sizeof(VirtIONetPCI), .instance_init = virtio_net_pci_instance_init, .class_init = virtio_net_pci_class_init, @@ -2513,9 +2640,11 @@ static void virtio_rng_initfn(Object *obj) TYPE_VIRTIO_RNG); } -static const TypeInfo virtio_rng_pci_info = { - .name = TYPE_VIRTIO_RNG_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_rng_pci_info = { + .base_name = TYPE_VIRTIO_RNG_PCI, + .generic_name = "virtio-rng-pci", + .transitional_name = "virtio-rng-pci-transitional", + .non_transitional_name = "virtio-rng-pci-non-transitional", .instance_size = sizeof(VirtIORngPCI), .instance_init = virtio_rng_initfn, .class_init = virtio_rng_pci_class_init, @@ -2605,24 +2734,24 @@ static const TypeInfo virtio_input_hid_pci_info = { .abstract = true, }; -static const TypeInfo virtio_keyboard_pci_info = { - .name = TYPE_VIRTIO_KEYBOARD_PCI, +static const VirtioPCIDeviceTypeInfo virtio_keyboard_pci_info = { + .generic_name = TYPE_VIRTIO_KEYBOARD_PCI, .parent = TYPE_VIRTIO_INPUT_HID_PCI, .class_init = virtio_input_hid_kbd_pci_class_init, .instance_size = sizeof(VirtIOInputHIDPCI), .instance_init = virtio_keyboard_initfn, }; -static const TypeInfo virtio_mouse_pci_info = { - .name = TYPE_VIRTIO_MOUSE_PCI, +static const VirtioPCIDeviceTypeInfo virtio_mouse_pci_info = { + .generic_name = TYPE_VIRTIO_MOUSE_PCI, .parent = TYPE_VIRTIO_INPUT_HID_PCI, .class_init = virtio_input_hid_mouse_pci_class_init, .instance_size = sizeof(VirtIOInputHIDPCI), .instance_init = virtio_mouse_initfn, }; -static const TypeInfo virtio_tablet_pci_info = { - .name = TYPE_VIRTIO_TABLET_PCI, +static const VirtioPCIDeviceTypeInfo virtio_tablet_pci_info = { + .generic_name = TYPE_VIRTIO_TABLET_PCI, .parent = TYPE_VIRTIO_INPUT_HID_PCI, .instance_size = sizeof(VirtIOInputHIDPCI), .instance_init = virtio_tablet_initfn, @@ -2637,8 +2766,11 @@ static void virtio_host_initfn(Object *obj) TYPE_VIRTIO_INPUT_HOST); } -static const TypeInfo virtio_host_pci_info = { - .name = TYPE_VIRTIO_INPUT_HOST_PCI, +static const VirtioPCIDeviceTypeInfo virtio_host_pci_info = { + .base_name = TYPE_VIRTIO_INPUT_HOST_PCI, + .generic_name = "virtio-input-host-pci", + .transitional_name = "virtio-input-host-pci-transitional", + .non_transitional_name = "virtio-input-host-pci-non-transitional", .parent = TYPE_VIRTIO_INPUT_PCI, .instance_size = sizeof(VirtIOInputHostPCI), .instance_init = virtio_host_initfn, @@ -2692,36 +2824,39 @@ static const TypeInfo virtio_pci_bus_info = { static void virtio_pci_register_types(void) { - type_register_static(&virtio_rng_pci_info); + /* Base types: */ + type_register_static(&virtio_pci_bus_info); + type_register_static(&virtio_pci_info); type_register_static(&virtio_input_pci_info); type_register_static(&virtio_input_hid_pci_info); - type_register_static(&virtio_keyboard_pci_info); - type_register_static(&virtio_mouse_pci_info); - type_register_static(&virtio_tablet_pci_info); + + /* Implementations: */ + virtio_pci_types_register(&virtio_rng_pci_info); + virtio_pci_types_register(&virtio_keyboard_pci_info); + virtio_pci_types_register(&virtio_mouse_pci_info); + virtio_pci_types_register(&virtio_tablet_pci_info); #ifdef CONFIG_LINUX - type_register_static(&virtio_host_pci_info); + virtio_pci_types_register(&virtio_host_pci_info); #endif - type_register_static(&virtio_pci_bus_info); - type_register_static(&virtio_pci_info); #ifdef CONFIG_VIRTFS - type_register_static(&virtio_9p_pci_info); + virtio_pci_types_register(&virtio_9p_pci_info); #endif - type_register_static(&virtio_blk_pci_info); + virtio_pci_types_register(&virtio_blk_pci_info); #if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) - type_register_static(&vhost_user_blk_pci_info); + virtio_pci_types_register(&vhost_user_blk_pci_info); #endif - type_register_static(&virtio_scsi_pci_info); - type_register_static(&virtio_balloon_pci_info); - type_register_static(&virtio_serial_pci_info); - type_register_static(&virtio_net_pci_info); + virtio_pci_types_register(&virtio_scsi_pci_info); + virtio_pci_types_register(&virtio_balloon_pci_info); + virtio_pci_types_register(&virtio_serial_pci_info); + virtio_pci_types_register(&virtio_net_pci_info); #ifdef CONFIG_VHOST_SCSI - type_register_static(&vhost_scsi_pci_info); + virtio_pci_types_register(&vhost_scsi_pci_info); #endif #if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) - type_register_static(&vhost_user_scsi_pci_info); + virtio_pci_types_register(&vhost_user_scsi_pci_info); #endif #ifdef CONFIG_VHOST_VSOCK - type_register_static(&vhost_vsock_pci_info); + virtio_pci_types_register(&vhost_vsock_pci_info); #endif } diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 813082b0d7..29b4216107 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -216,7 +216,7 @@ static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy) /* * virtio-scsi-pci: This extends VirtioPCIProxy. */ -#define TYPE_VIRTIO_SCSI_PCI "virtio-scsi-pci" +#define TYPE_VIRTIO_SCSI_PCI "virtio-scsi-pci-base" #define VIRTIO_SCSI_PCI(obj) \ OBJECT_CHECK(VirtIOSCSIPCI, (obj), TYPE_VIRTIO_SCSI_PCI) @@ -229,7 +229,7 @@ struct VirtIOSCSIPCI { /* * vhost-scsi-pci: This extends VirtioPCIProxy. */ -#define TYPE_VHOST_SCSI_PCI "vhost-scsi-pci" +#define TYPE_VHOST_SCSI_PCI "vhost-scsi-pci-base" #define VHOST_SCSI_PCI(obj) \ OBJECT_CHECK(VHostSCSIPCI, (obj), TYPE_VHOST_SCSI_PCI) @@ -239,7 +239,7 @@ struct VHostSCSIPCI { }; #endif -#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci" +#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci-base" #define VHOST_USER_SCSI_PCI(obj) \ OBJECT_CHECK(VHostUserSCSIPCI, (obj), TYPE_VHOST_USER_SCSI_PCI) @@ -252,7 +252,7 @@ struct VHostUserSCSIPCI { /* * vhost-user-blk-pci: This extends VirtioPCIProxy. */ -#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci" +#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci-base" #define VHOST_USER_BLK_PCI(obj) \ OBJECT_CHECK(VHostUserBlkPCI, (obj), TYPE_VHOST_USER_BLK_PCI) @@ -265,7 +265,7 @@ struct VHostUserBlkPCI { /* * virtio-blk-pci: This extends VirtioPCIProxy. */ -#define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci" +#define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci-base" #define VIRTIO_BLK_PCI(obj) \ OBJECT_CHECK(VirtIOBlkPCI, (obj), TYPE_VIRTIO_BLK_PCI) @@ -277,7 +277,7 @@ struct VirtIOBlkPCI { /* * virtio-balloon-pci: This extends VirtioPCIProxy. */ -#define TYPE_VIRTIO_BALLOON_PCI "virtio-balloon-pci" +#define TYPE_VIRTIO_BALLOON_PCI "virtio-balloon-pci-base" #define VIRTIO_BALLOON_PCI(obj) \ OBJECT_CHECK(VirtIOBalloonPCI, (obj), TYPE_VIRTIO_BALLOON_PCI) @@ -289,7 +289,7 @@ struct VirtIOBalloonPCI { /* * virtio-serial-pci: This extends VirtioPCIProxy. */ -#define TYPE_VIRTIO_SERIAL_PCI "virtio-serial-pci" +#define TYPE_VIRTIO_SERIAL_PCI "virtio-serial-pci-base" #define VIRTIO_SERIAL_PCI(obj) \ OBJECT_CHECK(VirtIOSerialPCI, (obj), TYPE_VIRTIO_SERIAL_PCI) @@ -301,7 +301,7 @@ struct VirtIOSerialPCI { /* * virtio-net-pci: This extends VirtioPCIProxy. */ -#define TYPE_VIRTIO_NET_PCI "virtio-net-pci" +#define TYPE_VIRTIO_NET_PCI "virtio-net-pci-base" #define VIRTIO_NET_PCI(obj) \ OBJECT_CHECK(VirtIONetPCI, (obj), TYPE_VIRTIO_NET_PCI) @@ -316,7 +316,7 @@ struct VirtIONetPCI { #ifdef CONFIG_VIRTFS -#define TYPE_VIRTIO_9P_PCI "virtio-9p-pci" +#define TYPE_VIRTIO_9P_PCI "virtio-9p-pci-base" #define VIRTIO_9P_PCI(obj) \ OBJECT_CHECK(V9fsPCIState, (obj), TYPE_VIRTIO_9P_PCI) @@ -330,7 +330,7 @@ typedef struct V9fsPCIState { /* * virtio-rng-pci: This extends VirtioPCIProxy. */ -#define TYPE_VIRTIO_RNG_PCI "virtio-rng-pci" +#define TYPE_VIRTIO_RNG_PCI "virtio-rng-pci-base" #define VIRTIO_RNG_PCI(obj) \ OBJECT_CHECK(VirtIORngPCI, (obj), TYPE_VIRTIO_RNG_PCI) @@ -365,7 +365,7 @@ struct VirtIOInputHIDPCI { #ifdef CONFIG_LINUX -#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci" +#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci-base" #define VIRTIO_INPUT_HOST_PCI(obj) \ OBJECT_CHECK(VirtIOInputHostPCI, (obj), TYPE_VIRTIO_INPUT_HOST_PCI) @@ -392,7 +392,7 @@ struct VirtIOGPUPCI { /* * vhost-vsock-pci: This extends VirtioPCIProxy. */ -#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci" +#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci-base" #define VHOST_VSOCK_PCI(obj) \ OBJECT_CHECK(VHostVSockPCI, (obj), TYPE_VHOST_VSOCK_PCI) @@ -417,4 +417,58 @@ struct VirtIOCryptoPCI { /* Virtio ABI version, if we increment this, we break the guest driver. */ #define VIRTIO_PCI_ABI_VERSION 0 +/* Input for virtio_pci_types_register() */ +typedef struct VirtioPCIDeviceTypeInfo { + /* + * Common base class for the subclasses below. + * + * Required only if transitional_name or non_transitional_name is set. + * + * We need a separate base type instead of making all types + * inherit from generic_name for two reasons: + * 1) generic_name implements INTERFACE_PCIE_DEVICE, but + * transitional_name does not. + * 2) generic_name has the "disable-legacy" and "disable-modern" + * properties, transitional_name and non_transitional name don't. + */ + const char *base_name; + /* + * Generic device type. Optional. + * + * Supports both transitional and non-transitional modes, + * using the disable-legacy and disable-modern properties. + * If disable-legacy=auto, (non-)transitional mode is selected + * depending on the bus where the device is plugged. + * + * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE, + * but PCI Express is supported only in non-transitional mode. + * + * The only type implemented by QEMU 3.1 and older. + */ + const char *generic_name; + /* + * The transitional device type. Optional. + * + * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE. + */ + const char *transitional_name; + /* + * The non-transitional device type. Optional. + * + * Implements INTERFACE_CONVENTIONAL_PCI_DEVICE only. + */ + const char *non_transitional_name; + + /* Parent type. If NULL, TYPE_VIRTIO_PCI is used */ + const char *parent; + + /* Same as TypeInfo fields: */ + size_t instance_size; + void (*instance_init)(Object *obj); + void (*class_init)(ObjectClass *klass, void *data); +} VirtioPCIDeviceTypeInfo; + +/* Register virtio-pci type(s). @t must be static. */ +void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); + #endif diff --git a/include/elf.h b/include/elf.h index c151164b63..0ac7911b7b 100644 --- a/include/elf.h +++ b/include/elf.h @@ -1338,6 +1338,61 @@ typedef struct { #define R_IA64_DTPREL64LSB 0xb7 /* @dtprel(sym + add), data8 LSB */ #define R_IA64_LTOFF_DTPREL22 0xba /* @ltoff(@dtprel(s+a)), imm22 */ +/* RISC-V relocations. */ +#define R_RISCV_NONE 0 +#define R_RISCV_32 1 +#define R_RISCV_64 2 +#define R_RISCV_RELATIVE 3 +#define R_RISCV_COPY 4 +#define R_RISCV_JUMP_SLOT 5 +#define R_RISCV_TLS_DTPMOD32 6 +#define R_RISCV_TLS_DTPMOD64 7 +#define R_RISCV_TLS_DTPREL32 8 +#define R_RISCV_TLS_DTPREL64 9 +#define R_RISCV_TLS_TPREL32 10 +#define R_RISCV_TLS_TPREL64 11 +#define R_RISCV_BRANCH 16 +#define R_RISCV_JAL 17 +#define R_RISCV_CALL 18 +#define R_RISCV_CALL_PLT 19 +#define R_RISCV_GOT_HI20 20 +#define R_RISCV_TLS_GOT_HI20 21 +#define R_RISCV_TLS_GD_HI20 22 +#define R_RISCV_PCREL_HI20 23 +#define R_RISCV_PCREL_LO12_I 24 +#define R_RISCV_PCREL_LO12_S 25 +#define R_RISCV_HI20 26 +#define R_RISCV_LO12_I 27 +#define R_RISCV_LO12_S 28 +#define R_RISCV_TPREL_HI20 29 +#define R_RISCV_TPREL_LO12_I 30 +#define R_RISCV_TPREL_LO12_S 31 +#define R_RISCV_TPREL_ADD 32 +#define R_RISCV_ADD8 33 +#define R_RISCV_ADD16 34 +#define R_RISCV_ADD32 35 +#define R_RISCV_ADD64 36 +#define R_RISCV_SUB8 37 +#define R_RISCV_SUB16 38 +#define R_RISCV_SUB32 39 +#define R_RISCV_SUB64 40 +#define R_RISCV_GNU_VTINHERIT 41 +#define R_RISCV_GNU_VTENTRY 42 +#define R_RISCV_ALIGN 43 +#define R_RISCV_RVC_BRANCH 44 +#define R_RISCV_RVC_JUMP 45 +#define R_RISCV_RVC_LUI 46 +#define R_RISCV_GPREL_I 47 +#define R_RISCV_GPREL_S 48 +#define R_RISCV_TPREL_I 49 +#define R_RISCV_TPREL_S 50 +#define R_RISCV_RELAX 51 +#define R_RISCV_SUB6 52 +#define R_RISCV_SET6 53 +#define R_RISCV_SET8 54 +#define R_RISCV_SET16 55 +#define R_RISCV_SET32 56 + typedef struct elf32_rel { Elf32_Addr r_offset; Elf32_Word r_info; diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h index 276dd5afce..ab4f8b6623 100644 --- a/include/exec/helper-head.h +++ b/include/exec/helper-head.h @@ -108,6 +108,19 @@ #define dh_is_signed_env dh_is_signed_ptr #define dh_is_signed(t) dh_is_signed_##t +#define dh_callflag_i32 0 +#define dh_callflag_s32 0 +#define dh_callflag_int 0 +#define dh_callflag_i64 0 +#define dh_callflag_s64 0 +#define dh_callflag_f16 0 +#define dh_callflag_f32 0 +#define dh_callflag_f64 0 +#define dh_callflag_ptr 0 +#define dh_callflag_void 0 +#define dh_callflag_noreturn TCG_CALL_NO_RETURN +#define dh_callflag(t) glue(dh_callflag_, dh_alias(t)) + #define dh_sizemask(t, n) \ ((dh_is_64bit(t) << (n*2)) | (dh_is_signed(t) << (n*2+1))) diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h index b3bdb0c399..268e0f804b 100644 --- a/include/exec/helper-tcg.h +++ b/include/exec/helper-tcg.h @@ -11,36 +11,43 @@ #define str(s) #s #define DEF_HELPER_FLAGS_0(NAME, FLAGS, ret) \ - { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ .sizemask = dh_sizemask(ret, 0) }, #define DEF_HELPER_FLAGS_1(NAME, FLAGS, ret, t1) \ - { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) }, #define DEF_HELPER_FLAGS_2(NAME, FLAGS, ret, t1, t2) \ - { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | dh_sizemask(t2, 2) }, #define DEF_HELPER_FLAGS_3(NAME, FLAGS, ret, t1, t2, t3) \ - { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) }, #define DEF_HELPER_FLAGS_4(NAME, FLAGS, ret, t1, t2, t3, t4) \ - { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) }, #define DEF_HELPER_FLAGS_5(NAME, FLAGS, ret, t1, t2, t3, t4, t5) \ - { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ | dh_sizemask(t5, 5) }, #define DEF_HELPER_FLAGS_6(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6) \ - { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) }, diff --git a/include/exec/poison.h b/include/exec/poison.h index 32d53789f8..ecdc83c147 100644 --- a/include/exec/poison.h +++ b/include/exec/poison.h @@ -79,6 +79,7 @@ #pragma GCC poison CONFIG_MOXIE_DIS #pragma GCC poison CONFIG_NIOS2_DIS #pragma GCC poison CONFIG_PPC_DIS +#pragma GCC poison CONFIG_RISCV_DIS #pragma GCC poison CONFIG_S390_DIS #pragma GCC poison CONFIG_SH4_DIS #pragma GCC poison CONFIG_SPARC_DIS diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h index af8e023968..5021cb9e79 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h @@ -40,18 +40,13 @@ enum { ACPI_FADT_F_LOW_POWER_S0_IDLE_CAPABLE, }; -struct AcpiRsdpDescriptor { /* Root System Descriptor Pointer */ - uint64_t signature; /* ACPI signature, contains "RSD PTR " */ - uint8_t checksum; /* To make sum of struct == 0 */ - uint8_t oem_id [6]; /* OEM identification */ - uint8_t revision; /* Must be 0 for 1.0, 2 for 2.0 */ - uint32_t rsdt_physical_address; /* 32-bit physical address of RSDT */ - uint32_t length; /* XSDT Length in bytes including hdr */ - uint64_t xsdt_physical_address; /* 64-bit physical address of XSDT */ - uint8_t extended_checksum; /* Checksum of entire table */ - uint8_t reserved [3]; /* Reserved field must be 0 */ -} QEMU_PACKED; -typedef struct AcpiRsdpDescriptor AcpiRsdpDescriptor; +typedef struct AcpiRsdpData { + uint8_t oem_id[6]; /* OEM identification */ + uint8_t revision; /* Must be 0 for 1.0, 2 for 2.0 */ + + unsigned *rsdt_tbl_offset; + unsigned *xsdt_tbl_offset; +} AcpiRsdpData; /* Table structure from Linux kernel (the ACPI tables are under the BSD license) */ diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index 6c36903c0a..1a563ad756 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -388,6 +388,8 @@ void acpi_add_table(GArray *table_offsets, GArray *table_data); void acpi_build_tables_init(AcpiBuildTables *tables); void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre); void +build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data); +void build_rsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets, const char *oem_id, const char *oem_table_id); void diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h index 8a65f99fc8..8bc4a4c01d 100644 --- a/include/hw/acpi/pcihp.h +++ b/include/hw/acpi/pcihp.h @@ -56,10 +56,15 @@ typedef struct AcpiPciHpState { void acpi_pcihp_init(Object *owner, AcpiPciHpState *, PCIBus *root, MemoryRegion *address_space_io, bool bridges_enabled); +void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, DeviceState *dev, Error **errp); void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, DeviceState *dev, Error **errp); +void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, + AcpiPciHpState *s, DeviceState *dev, + Error **errp); /* Called on reset */ void acpi_pcihp_reset(AcpiPciHpState *s); diff --git a/include/hw/boards.h b/include/hw/boards.h index f82f28468b..362384815e 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -195,6 +195,7 @@ struct MachineClass { const char *hw_version; ram_addr_t default_ram_size; const char *default_cpu_type; + bool default_kernel_irqchip_split; bool option_rom_has_mr; bool rom_file_has_mr; int minimum_page_bits; diff --git a/include/hw/compat.h b/include/hw/compat.h index 70958328fe..3ca85b037c 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -2,7 +2,15 @@ #define HW_COMPAT_H #define HW_COMPAT_3_1 \ - /* empty */ + {\ + .driver = "pcie-root-port",\ + .property = "x-speed",\ + .value = "2_5",\ + },{\ + .driver = "pcie-root-port",\ + .property = "x-width",\ + .value = "1",\ + }, #define HW_COMPAT_3_0 \ /* empty */ diff --git a/include/hw/smbios/smbios.h b/include/hw/firmware/smbios.h index eeb5a4d7b6..eeb5a4d7b6 100644 --- a/include/hw/smbios/smbios.h +++ b/include/hw/firmware/smbios.h diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index ed4e758273..a321cc9691 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -245,6 +245,7 @@ struct IntelIOMMUState { OnOffAuto intr_eim; /* Toggle for EIM cabability */ bool buggy_eim; /* Force buggy EIM unless eim=off */ uint8_t aw_bits; /* Host/IOVA address width (in bits) */ + bool dma_drain; /* Whether DMA r/w draining enabled */ /* * Protects IOMMU states in general. Currently it protects the diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 9d29c4b1df..c7c0c944e8 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -296,6 +296,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); #define PC_COMPAT_3_1 \ HW_COMPAT_3_1 \ + {\ + .driver = "intel-iommu",\ + .property = "dma-drain",\ + .value = "off",\ + }, #define PC_COMPAT_3_0 \ HW_COMPAT_3_0 \ diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h index 2b22a579a3..dcd9719a2c 100644 --- a/include/hw/i386/x86-iommu.h +++ b/include/hw/i386/x86-iommu.h @@ -74,13 +74,15 @@ typedef struct IEC_Notifier IEC_Notifier; struct X86IOMMUState { SysBusDevice busdev; - bool intr_supported; /* Whether vIOMMU supports IR */ + OnOffAuto intr_supported; /* Whether vIOMMU supports IR */ bool dt_supported; /* Whether vIOMMU supports DT */ bool pt_supported; /* Whether vIOMMU supports pass-through */ IommuType type; /* IOMMU type - AMD/Intel */ QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */ }; +bool x86_iommu_ir_supported(X86IOMMUState *s); + /* Generic IRQ entry information when interrupt remapping is enabled */ struct X86IOMMUIrq { /* Used by both IOAPIC/MSI interrupt remapping */ diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index e6514bba23..eb12fa112e 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -737,6 +737,19 @@ static inline int pci_is_express(const PCIDevice *d) return d->cap_present & QEMU_PCI_CAP_EXPRESS; } +static inline int pci_is_express_downstream_port(const PCIDevice *d) +{ + uint8_t type; + + if (!pci_is_express(d) || !d->exp.exp_cap) { + return 0; + } + + type = pcie_cap_get_type(d); + + return type == PCI_EXP_TYPE_DOWNSTREAM || type == PCI_EXP_TYPE_ROOT_PORT; +} + static inline uint32_t pci_config_size(const PCIDevice *d) { return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE; diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index cdff7edfd1..ba488818d2 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -99,6 +99,12 @@ void pci_bridge_reset(DeviceState *qdev); void pci_bridge_initfn(PCIDevice *pci_dev, const char *typename); void pci_bridge_exitfn(PCIDevice *pci_dev); +void pci_bridge_dev_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void pci_bridge_dev_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void pci_bridge_dev_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); /* * before qdev initialization(qdev_init()), this function sets bus_name and diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h index b71e369703..cd318646a2 100644 --- a/include/hw/pci/pcie.h +++ b/include/hw/pci/pcie.h @@ -126,13 +126,16 @@ uint16_t pcie_find_capability(PCIDevice *dev, uint16_t cap_id); void pcie_add_capability(PCIDevice *dev, uint16_t cap_id, uint8_t cap_ver, uint16_t offset, uint16_t size); +void pcie_sync_bridge_lnk(PCIDevice *dev); void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn); void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num); void pcie_ats_init(PCIDevice *dev, uint16_t offset); -void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - Error **errp); -void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp); +void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); #endif /* QEMU_PCIE_H */ diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h index 0736014bfd..df242a0caf 100644 --- a/include/hw/pci/pcie_port.h +++ b/include/hw/pci/pcie_port.h @@ -49,6 +49,10 @@ struct PCIESlot { /* pci express switch port with slot */ uint8_t chassis; uint16_t slot; + + PCIExpLinkSpeed speed; + PCIExpLinkWidth width; + QLIST_ENTRY(PCIESlot) next; }; diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h index a95522a13b..ad4e7808b8 100644 --- a/include/hw/pci/pcie_regs.h +++ b/include/hw/pci/pcie_regs.h @@ -34,10 +34,29 @@ /* PCI_EXP_LINK{CAP, STA} */ /* link speed */ -#define PCI_EXP_LNK_LS_25 1 +typedef enum PCIExpLinkSpeed { + QEMU_PCI_EXP_LNK_2_5GT = 1, + QEMU_PCI_EXP_LNK_5GT, + QEMU_PCI_EXP_LNK_8GT, + QEMU_PCI_EXP_LNK_16GT, +} PCIExpLinkSpeed; + +#define QEMU_PCI_EXP_LNKCAP_MLS(speed) (speed) +#define QEMU_PCI_EXP_LNKSTA_CLS QEMU_PCI_EXP_LNKCAP_MLS + +typedef enum PCIExpLinkWidth { + QEMU_PCI_EXP_LNK_X1 = 1, + QEMU_PCI_EXP_LNK_X2 = 2, + QEMU_PCI_EXP_LNK_X4 = 4, + QEMU_PCI_EXP_LNK_X8 = 8, + QEMU_PCI_EXP_LNK_X12 = 12, + QEMU_PCI_EXP_LNK_X16 = 16, + QEMU_PCI_EXP_LNK_X32 = 32, +} PCIExpLinkWidth; #define PCI_EXP_LNK_MLW_SHIFT ctz32(PCI_EXP_LNKCAP_MLW) -#define PCI_EXP_LNK_MLW_1 (1 << PCI_EXP_LNK_MLW_SHIFT) +#define QEMU_PCI_EXP_LNKCAP_MLW(width) (width << PCI_EXP_LNK_MLW_SHIFT) +#define QEMU_PCI_EXP_LNKSTA_NLW QEMU_PCI_EXP_LNKCAP_MLW /* PCI_EXP_LINKCAP */ #define PCI_EXP_LNKCAP_ASPMS_SHIFT ctz32(PCI_EXP_LNKCAP_ASPMS) diff --git a/include/hw/pci/shpc.h b/include/hw/pci/shpc.h index ee19fecf61..18f6ec1cd5 100644 --- a/include/hw/pci/shpc.h +++ b/include/hw/pci/shpc.h @@ -45,10 +45,12 @@ void shpc_free(PCIDevice *dev); void shpc_cap_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int len); -void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - Error **errp); -void shpc_device_hot_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp); +void shpc_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void shpc_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void shpc_device_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); extern VMStateInfo shpc_vmstate_info; #define SHPC_VMSTATE(_field, _type, _test) \ diff --git a/include/hw/ppc/openpic.h b/include/hw/ppc/openpic.h index 5eb982197d..dad08fe9be 100644 --- a/include/hw/ppc/openpic.h +++ b/include/hw/ppc/openpic.h @@ -20,6 +20,8 @@ enum { OPENPIC_OUTPUT_NB, }; +typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines; + #define OPENPIC_MODEL_RAVEN 0 #define OPENPIC_MODEL_FSL_MPIC_20 1 #define OPENPIC_MODEL_FSL_MPIC_42 2 diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 6279711fe8..2c77a8ba88 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -16,6 +16,7 @@ typedef struct sPAPREventLogEntry sPAPREventLogEntry; typedef struct sPAPREventSource sPAPREventSource; typedef struct sPAPRPendingHPT sPAPRPendingHPT; typedef struct ICSState ICSState; +typedef struct sPAPRXive sPAPRXive; #define HPTE64_V_HPTE_DIRTY 0x0000000000000040ULL #define SPAPR_ENTRY_POINT 0x100 @@ -175,6 +176,8 @@ struct sPAPRMachineState { const char *icp_type; int32_t irq_map_nr; unsigned long *irq_map; + sPAPRXive *xive; + sPAPRIrq *irq; bool cmd_line_caps[SPAPR_CAP_NUM]; sPAPRCapabilities def, eff, mig; @@ -450,7 +453,20 @@ struct sPAPRMachineState { #define H_INVALIDATE_PID 0x378 #define H_REGISTER_PROC_TBL 0x37C #define H_SIGNAL_SYS_RESET 0x380 -#define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET + +#define H_INT_GET_SOURCE_INFO 0x3A8 +#define H_INT_SET_SOURCE_CONFIG 0x3AC +#define H_INT_GET_SOURCE_CONFIG 0x3B0 +#define H_INT_GET_QUEUE_INFO 0x3B4 +#define H_INT_SET_QUEUE_CONFIG 0x3B8 +#define H_INT_GET_QUEUE_CONFIG 0x3BC +#define H_INT_SET_OS_REPORTING_LINE 0x3C0 +#define H_INT_GET_OS_REPORTING_LINE 0x3C4 +#define H_INT_ESB 0x3C8 +#define H_INT_SYNC 0x3CC +#define H_INT_RESET 0x3D0 + +#define MAX_HCALL_OPCODE H_INT_RESET /* The hcalls above are standardized in PAPR and implemented by pHyp * as well. @@ -737,6 +753,7 @@ int spapr_hpt_shift_for_ramsize(uint64_t ramsize); void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, Error **errp); void spapr_clear_pending_events(sPAPRMachineState *spapr); +int spapr_max_server_number(sPAPRMachineState *spapr); /* CPU and LMB DRC release callbacks. */ void spapr_core_release(DeviceState *dev); @@ -808,5 +825,11 @@ int spapr_caps_post_migration(sPAPRMachineState *spapr); void spapr_check_pagesize(sPAPRMachineState *spapr, hwaddr pagesize, Error **errp); +/* + * XIVE definitions + */ +#define SPAPR_OV5_XIVE_LEGACY 0x0 +#define SPAPR_OV5_XIVE_EXPLOIT 0x40 +#define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on the platform */ #endif /* HW_SPAPR_H */ diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h index a467ce696e..b34d5a0038 100644 --- a/include/hw/ppc/spapr_irq.h +++ b/include/hw/ppc/spapr_irq.h @@ -13,6 +13,7 @@ /* * IRQ range offsets per device type */ +#define SPAPR_IRQ_IPI 0x0 #define SPAPR_IRQ_EPOW 0x1000 /* XICS_IRQ_BASE offset */ #define SPAPR_IRQ_HOTPLUG 0x1001 #define SPAPR_IRQ_VIO 0x1100 /* 256 VIO devices */ @@ -32,20 +33,31 @@ void spapr_irq_msi_reset(sPAPRMachineState *spapr); typedef struct sPAPRIrq { uint32_t nr_irqs; uint32_t nr_msis; + uint8_t ov5; void (*init)(sPAPRMachineState *spapr, Error **errp); int (*claim)(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp); void (*free)(sPAPRMachineState *spapr, int irq, int num); qemu_irq (*qirq)(sPAPRMachineState *spapr, int irq); void (*print_info)(sPAPRMachineState *spapr, Monitor *mon); + void (*dt_populate)(sPAPRMachineState *spapr, uint32_t nr_servers, + void *fdt, uint32_t phandle); + Object *(*cpu_intc_create)(sPAPRMachineState *spapr, Object *cpu, + Error **errp); + int (*post_load)(sPAPRMachineState *spapr, int version_id); + void (*reset)(sPAPRMachineState *spapr, Error **errp); } sPAPRIrq; extern sPAPRIrq spapr_irq_xics; extern sPAPRIrq spapr_irq_xics_legacy; +extern sPAPRIrq spapr_irq_xive; +void spapr_irq_init(sPAPRMachineState *spapr, Error **errp); int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp); void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num); qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq); +int spapr_irq_post_load(sPAPRMachineState *spapr, int version_id); +void spapr_irq_reset(sPAPRMachineState *spapr, Error **errp); /* * XICS legacy routines diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h new file mode 100644 index 0000000000..728735dbcf --- /dev/null +++ b/include/hw/ppc/spapr_xive.h @@ -0,0 +1,52 @@ +/* + * QEMU PowerPC sPAPR XIVE interrupt controller model + * + * Copyright (c) 2017-2018, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef PPC_SPAPR_XIVE_H +#define PPC_SPAPR_XIVE_H + +#include "hw/ppc/xive.h" + +#define TYPE_SPAPR_XIVE "spapr-xive" +#define SPAPR_XIVE(obj) OBJECT_CHECK(sPAPRXive, (obj), TYPE_SPAPR_XIVE) + +typedef struct sPAPRXive { + XiveRouter parent; + + /* Internal interrupt source for IPIs and virtual devices */ + XiveSource source; + hwaddr vc_base; + + /* END ESB MMIOs */ + XiveENDSource end_source; + hwaddr end_base; + + /* Routing table */ + XiveEAS *eat; + uint32_t nr_irqs; + XiveEND *endt; + uint32_t nr_ends; + + /* TIMA mapping address */ + hwaddr tm_base; + MemoryRegion tm_mmio; +} sPAPRXive; + +bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi); +bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn); +void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon); +qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn); + +typedef struct sPAPRMachineState sPAPRMachineState; + +void spapr_xive_hcall_init(sPAPRMachineState *spapr); +void spapr_dt_xive(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt, + uint32_t phandle); +void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx); + +#endif /* PPC_SPAPR_XIVE_H */ diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 9958443d19..14afda198c 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -181,8 +181,6 @@ typedef struct XICSFabricClass { ICPState *(*icp_get)(XICSFabric *xi, int server); } XICSFabricClass; -void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle); - ICPState *xics_icp_get(XICSFabric *xi, int server); /* Internal XICS interfaces */ @@ -204,6 +202,8 @@ void icp_resend(ICPState *ss); typedef struct sPAPRMachineState sPAPRMachineState; +void spapr_dt_xics(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt, + uint32_t phandle); int xics_kvm_init(sPAPRMachineState *spapr, Error **errp); void xics_spapr_init(sPAPRMachineState *spapr); diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h new file mode 100644 index 0000000000..18cd114eb2 --- /dev/null +++ b/include/hw/ppc/xive.h @@ -0,0 +1,429 @@ +/* + * QEMU PowerPC XIVE interrupt controller model + * + * + * The POWER9 processor comes with a new interrupt controller, called + * XIVE as "eXternal Interrupt Virtualization Engine". + * + * = Overall architecture + * + * + * XIVE Interrupt Controller + * +------------------------------------+ IPIs + * | +---------+ +---------+ +--------+ | +-------+ + * | |VC | |CQ | |PC |----> | CORES | + * | | esb | | | | |----> | | + * | | eas | | Bridge | | tctx |----> | | + * | |SC end | | | | nvt | | | | + * +------+ | +---------+ +----+----+ +--------+ | +-+-+-+-+ + * | RAM | +------------------|-----------------+ | | | + * | | | | | | + * | | | | | | + * | | +--------------------v------------------------v-v-v--+ other + * | <--+ Power Bus +--> chips + * | esb | +---------+-----------------------+------------------+ + * | eas | | | + * | end | +--|------+ | + * | nvt | +----+----+ | +----+----+ + * +------+ |SC | | |SC | + * | | | | | + * | PQ-bits | | | PQ-bits | + * | local |-+ | in VC | + * +---------+ +---------+ + * PCIe NX,NPU,CAPI + * + * SC: Source Controller (aka. IVSE) + * VC: Virtualization Controller (aka. IVRE) + * PC: Presentation Controller (aka. IVPE) + * CQ: Common Queue (Bridge) + * + * PQ-bits: 2 bits source state machine (P:pending Q:queued) + * esb: Event State Buffer (Array of PQ bits in an IVSE) + * eas: Event Assignment Structure + * end: Event Notification Descriptor + * nvt: Notification Virtual Target + * tctx: Thread interrupt Context + * + * + * The XIVE IC is composed of three sub-engines : + * + * - Interrupt Virtualization Source Engine (IVSE), or Source + * Controller (SC). These are found in PCI PHBs, in the PSI host + * bridge controller, but also inside the main controller for the + * core IPIs and other sub-chips (NX, CAP, NPU) of the + * chip/processor. They are configured to feed the IVRE with events. + * + * - Interrupt Virtualization Routing Engine (IVRE) or Virtualization + * Controller (VC). Its job is to match an event source with an + * Event Notification Descriptor (END). + * + * - Interrupt Virtualization Presentation Engine (IVPE) or + * Presentation Controller (PC). It maintains the interrupt context + * state of each thread and handles the delivery of the external + * exception to the thread. + * + * In XIVE 1.0, the sub-engines used to be referred as: + * + * SC Source Controller + * VC Virtualization Controller + * PC Presentation Controller + * CQ Common Queue (PowerBUS Bridge) + * + * + * = XIVE internal tables + * + * Each of the sub-engines uses a set of tables to redirect exceptions + * from event sources to CPU threads. + * + * +-------+ + * User or OS | EQ | + * or +------>|entries| + * Hypervisor | | .. | + * Memory | +-------+ + * | ^ + * | | + * +-------------------------------------------------+ + * | | + * Hypervisor +------+ +---+--+ +---+--+ +------+ + * Memory | ESB | | EAT | | ENDT | | NVTT | + * (skiboot) +----+-+ +----+-+ +----+-+ +------+ + * ^ | ^ | ^ | ^ + * | | | | | | | + * +-------------------------------------------------+ + * | | | | | | | + * | | | | | | | + * +----|--|--------|--|--------|--|-+ +-|-----+ +------+ + * | | | | | | | | | | tctx| |Thread| + * IPI or --> | + v + v + v |---| + .. |-----> | + * HW events --> | | | | | | + * IVSE | IVRE | | IVPE | +------+ + * +---------------------------------+ +-------+ + * + * + * + * The IVSE have a 2-bits state machine, P for pending and Q for queued, + * for each source that allows events to be triggered. They are stored in + * an Event State Buffer (ESB) array and can be controlled by MMIOs. + * + * If the event is let through, the IVRE looks up in the Event Assignment + * Structure (EAS) table for an Event Notification Descriptor (END) + * configured for the source. Each Event Notification Descriptor defines + * a notification path to a CPU and an in-memory Event Queue, in which + * will be enqueued an EQ data for the OS to pull. + * + * The IVPE determines if a Notification Virtual Target (NVT) can + * handle the event by scanning the thread contexts of the VCPUs + * dispatched on the processor HW threads. It maintains the state of + * the thread interrupt context (TCTX) of each thread in a NVT table. + * + * = Acronyms + * + * Description In XIVE 1.0, used to be referred as + * + * EAS Event Assignment Structure IVE Interrupt Virt. Entry + * EAT Event Assignment Table IVT Interrupt Virt. Table + * ENDT Event Notif. Descriptor Table EQDT Event Queue Desc. Table + * EQ Event Queue same + * ESB Event State Buffer SBE State Bit Entry + * NVT Notif. Virtual Target VPD Virtual Processor Desc. + * NVTT Notif. Virtual Target Table VPDT Virtual Processor Desc. Table + * TCTX Thread interrupt Context + * + * + * Copyright (c) 2017-2018, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + * + */ + +#ifndef PPC_XIVE_H +#define PPC_XIVE_H + +#include "hw/qdev-core.h" +#include "hw/sysbus.h" +#include "hw/ppc/xive_regs.h" + +/* + * XIVE Fabric (Interface between Source and Router) + */ + +typedef struct XiveNotifier { + Object parent; +} XiveNotifier; + +#define TYPE_XIVE_NOTIFIER "xive-notifier" +#define XIVE_NOTIFIER(obj) \ + OBJECT_CHECK(XiveNotifier, (obj), TYPE_XIVE_NOTIFIER) +#define XIVE_NOTIFIER_CLASS(klass) \ + OBJECT_CLASS_CHECK(XiveNotifierClass, (klass), TYPE_XIVE_NOTIFIER) +#define XIVE_NOTIFIER_GET_CLASS(obj) \ + OBJECT_GET_CLASS(XiveNotifierClass, (obj), TYPE_XIVE_NOTIFIER) + +typedef struct XiveNotifierClass { + InterfaceClass parent; + void (*notify)(XiveNotifier *xn, uint32_t lisn); +} XiveNotifierClass; + +/* + * XIVE Interrupt Source + */ + +#define TYPE_XIVE_SOURCE "xive-source" +#define XIVE_SOURCE(obj) OBJECT_CHECK(XiveSource, (obj), TYPE_XIVE_SOURCE) + +/* + * XIVE Interrupt Source characteristics, which define how the ESB are + * controlled. + */ +#define XIVE_SRC_H_INT_ESB 0x1 /* ESB managed with hcall H_INT_ESB */ +#define XIVE_SRC_STORE_EOI 0x2 /* Store EOI supported */ + +typedef struct XiveSource { + DeviceState parent; + + /* IRQs */ + uint32_t nr_irqs; + qemu_irq *qirqs; + unsigned long *lsi_map; + + /* PQ bits and LSI assertion bit */ + uint8_t *status; + + /* ESB memory region */ + uint64_t esb_flags; + uint32_t esb_shift; + MemoryRegion esb_mmio; + + XiveNotifier *xive; +} XiveSource; + +/* + * ESB MMIO setting. Can be one page, for both source triggering and + * source management, or two different pages. See below for magic + * values. + */ +#define XIVE_ESB_4K 12 /* PSI HB only */ +#define XIVE_ESB_4K_2PAGE 13 +#define XIVE_ESB_64K 16 +#define XIVE_ESB_64K_2PAGE 17 + +static inline bool xive_source_esb_has_2page(XiveSource *xsrc) +{ + return xsrc->esb_shift == XIVE_ESB_64K_2PAGE || + xsrc->esb_shift == XIVE_ESB_4K_2PAGE; +} + +/* The trigger page is always the first/even page */ +static inline hwaddr xive_source_esb_page(XiveSource *xsrc, uint32_t srcno) +{ + assert(srcno < xsrc->nr_irqs); + return (1ull << xsrc->esb_shift) * srcno; +} + +/* In a two pages ESB MMIO setting, the odd page is for management */ +static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno) +{ + hwaddr addr = xive_source_esb_page(xsrc, srcno); + + if (xive_source_esb_has_2page(xsrc)) { + addr += (1 << (xsrc->esb_shift - 1)); + } + + return addr; +} + +/* + * Each interrupt source has a 2-bit state machine which can be + * controlled by MMIO. P indicates that an interrupt is pending (has + * been sent to a queue and is waiting for an EOI). Q indicates that + * the interrupt has been triggered while pending. + * + * This acts as a coalescing mechanism in order to guarantee that a + * given interrupt only occurs at most once in a queue. + * + * When doing an EOI, the Q bit will indicate if the interrupt + * needs to be re-triggered. + */ +#define XIVE_STATUS_ASSERTED 0x4 /* Extra bit for LSI */ +#define XIVE_ESB_VAL_P 0x2 +#define XIVE_ESB_VAL_Q 0x1 + +#define XIVE_ESB_RESET 0x0 +#define XIVE_ESB_PENDING XIVE_ESB_VAL_P +#define XIVE_ESB_QUEUED (XIVE_ESB_VAL_P | XIVE_ESB_VAL_Q) +#define XIVE_ESB_OFF XIVE_ESB_VAL_Q + +/* + * "magic" Event State Buffer (ESB) MMIO offsets. + * + * The following offsets into the ESB MMIO allow to read or manipulate + * the PQ bits. They must be used with an 8-byte load instruction. + * They all return the previous state of the interrupt (atomically). + * + * Additionally, some ESB pages support doing an EOI via a store and + * some ESBs support doing a trigger via a separate trigger page. + */ +#define XIVE_ESB_STORE_EOI 0x400 /* Store */ +#define XIVE_ESB_LOAD_EOI 0x000 /* Load */ +#define XIVE_ESB_GET 0x800 /* Load */ +#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ +#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ +#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ +#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ + +uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno); +uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq); + +void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, + Monitor *mon); + +static inline qemu_irq xive_source_qirq(XiveSource *xsrc, uint32_t srcno) +{ + assert(srcno < xsrc->nr_irqs); + return xsrc->qirqs[srcno]; +} + +static inline bool xive_source_irq_is_lsi(XiveSource *xsrc, uint32_t srcno) +{ + assert(srcno < xsrc->nr_irqs); + return test_bit(srcno, xsrc->lsi_map); +} + +static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno, + bool lsi) +{ + assert(srcno < xsrc->nr_irqs); + if (lsi) { + bitmap_set(xsrc->lsi_map, srcno, 1); + } +} + +/* + * XIVE Router + */ + +typedef struct XiveRouter { + SysBusDevice parent; +} XiveRouter; + +#define TYPE_XIVE_ROUTER "xive-router" +#define XIVE_ROUTER(obj) \ + OBJECT_CHECK(XiveRouter, (obj), TYPE_XIVE_ROUTER) +#define XIVE_ROUTER_CLASS(klass) \ + OBJECT_CLASS_CHECK(XiveRouterClass, (klass), TYPE_XIVE_ROUTER) +#define XIVE_ROUTER_GET_CLASS(obj) \ + OBJECT_GET_CLASS(XiveRouterClass, (obj), TYPE_XIVE_ROUTER) + +typedef struct XiveRouterClass { + SysBusDeviceClass parent; + + /* XIVE table accessors */ + int (*get_eas)(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx, + XiveEAS *eas); + int (*get_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end); + int (*write_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end, uint8_t word_number); + int (*get_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, + XiveNVT *nvt); + int (*write_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, + XiveNVT *nvt, uint8_t word_number); +} XiveRouterClass; + +void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon); + +int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx, + XiveEAS *eas); +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end); +int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end, uint8_t word_number); +int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, + XiveNVT *nvt); +int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx, + XiveNVT *nvt, uint8_t word_number); + + +/* + * XIVE END ESBs + */ + +#define TYPE_XIVE_END_SOURCE "xive-end-source" +#define XIVE_END_SOURCE(obj) \ + OBJECT_CHECK(XiveENDSource, (obj), TYPE_XIVE_END_SOURCE) + +typedef struct XiveENDSource { + DeviceState parent; + + uint32_t nr_ends; + uint8_t block_id; + + /* ESB memory region */ + uint32_t esb_shift; + MemoryRegion esb_mmio; + + XiveRouter *xrtr; +} XiveENDSource; + +/* + * For legacy compatibility, the exceptions define up to 256 different + * priorities. P9 implements only 9 levels : 8 active levels [0 - 7] + * and the least favored level 0xFF. + */ +#define XIVE_PRIORITY_MAX 7 + +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon); +void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon); + +/* + * XIVE Thread interrupt Management (TM) context + */ + +#define TYPE_XIVE_TCTX "xive-tctx" +#define XIVE_TCTX(obj) OBJECT_CHECK(XiveTCTX, (obj), TYPE_XIVE_TCTX) + +/* + * XIVE Thread interrupt Management register rings : + * + * QW-0 User event-based exception state + * QW-1 O/S OS context for priority management, interrupt acks + * QW-2 Pool hypervisor pool context for virtual processors dispatched + * QW-3 Physical physical thread context and security context + */ +#define XIVE_TM_RING_COUNT 4 +#define XIVE_TM_RING_SIZE 0x10 + +typedef struct XiveTCTX { + DeviceState parent_obj; + + CPUState *cs; + qemu_irq output; + + uint8_t regs[XIVE_TM_RING_COUNT * XIVE_TM_RING_SIZE]; +} XiveTCTX; + +/* + * XIVE Thread Interrupt Management Aera (TIMA) + * + * This region gives access to the registers of the thread interrupt + * management context. It is four page wide, each page providing a + * different view of the registers. The page with the lower offset is + * the most privileged and gives access to the entire context. + */ +#define XIVE_TM_HW_PAGE 0x0 +#define XIVE_TM_HV_PAGE 0x1 +#define XIVE_TM_OS_PAGE 0x2 +#define XIVE_TM_USER_PAGE 0x3 + +extern const MemoryRegionOps xive_tm_ops; + +void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon); +Object *xive_tctx_create(Object *cpu, XiveRouter *xrtr, Error **errp); + +static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx) +{ + return (nvt_blk << 19) | nvt_idx; +} + +#endif /* PPC_XIVE_H */ diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h new file mode 100644 index 0000000000..bf36678a24 --- /dev/null +++ b/include/hw/ppc/xive_regs.h @@ -0,0 +1,235 @@ +/* + * QEMU PowerPC XIVE internal structure definitions + * + * + * The XIVE structures are accessed by the HW and their format is + * architected to be big-endian. Some macros are provided to ease + * access to the different fields. + * + * + * Copyright (c) 2016-2018, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef PPC_XIVE_REGS_H +#define PPC_XIVE_REGS_H + +/* + * Interrupt source number encoding on PowerBUS + */ +#define XIVE_SRCNO_BLOCK(srcno) (((srcno) >> 28) & 0xf) +#define XIVE_SRCNO_INDEX(srcno) ((srcno) & 0x0fffffff) +#define XIVE_SRCNO(blk, idx) ((uint32_t)(blk) << 28 | (idx)) + +#define TM_SHIFT 16 + +/* TM register offsets */ +#define TM_QW0_USER 0x000 /* All rings */ +#define TM_QW1_OS 0x010 /* Ring 0..2 */ +#define TM_QW2_HV_POOL 0x020 /* Ring 0..1 */ +#define TM_QW3_HV_PHYS 0x030 /* Ring 0..1 */ + +/* Byte offsets inside a QW QW0 QW1 QW2 QW3 */ +#define TM_NSR 0x0 /* + + - + */ +#define TM_CPPR 0x1 /* - + - + */ +#define TM_IPB 0x2 /* - + + + */ +#define TM_LSMFB 0x3 /* - + + + */ +#define TM_ACK_CNT 0x4 /* - + - - */ +#define TM_INC 0x5 /* - + - + */ +#define TM_AGE 0x6 /* - + - + */ +#define TM_PIPR 0x7 /* - + - + */ + +#define TM_WORD0 0x0 +#define TM_WORD1 0x4 + +/* + * QW word 2 contains the valid bit at the top and other fields + * depending on the QW. + */ +#define TM_WORD2 0x8 +#define TM_QW0W2_VU PPC_BIT32(0) +#define TM_QW0W2_LOGIC_SERV PPC_BITMASK32(1, 31) /* XX 2,31 ? */ +#define TM_QW1W2_VO PPC_BIT32(0) +#define TM_QW1W2_OS_CAM PPC_BITMASK32(8, 31) +#define TM_QW2W2_VP PPC_BIT32(0) +#define TM_QW2W2_POOL_CAM PPC_BITMASK32(8, 31) +#define TM_QW3W2_VT PPC_BIT32(0) +#define TM_QW3W2_LP PPC_BIT32(6) +#define TM_QW3W2_LE PPC_BIT32(7) +#define TM_QW3W2_T PPC_BIT32(31) + +/* + * In addition to normal loads to "peek" and writes (only when invalid) + * using 4 and 8 bytes accesses, the above registers support these + * "special" byte operations: + * + * - Byte load from QW0[NSR] - User level NSR (EBB) + * - Byte store to QW0[NSR] - User level NSR (EBB) + * - Byte load/store to QW1[CPPR] and QW3[CPPR] - CPPR access + * - Byte load from QW3[TM_WORD2] - Read VT||00000||LP||LE on thrd 0 + * otherwise VT||0000000 + * - Byte store to QW3[TM_WORD2] - Set VT bit (and LP/LE if present) + * + * Then we have all these "special" CI ops at these offset that trigger + * all sorts of side effects: + */ +#define TM_SPC_ACK_EBB 0x800 /* Load8 ack EBB to reg*/ +#define TM_SPC_ACK_OS_REG 0x810 /* Load16 ack OS irq to reg */ +#define TM_SPC_PUSH_USR_CTX 0x808 /* Store32 Push/Validate user context */ +#define TM_SPC_PULL_USR_CTX 0x808 /* Load32 Pull/Invalidate user + * context */ +#define TM_SPC_SET_OS_PENDING 0x812 /* Store8 Set OS irq pending bit */ +#define TM_SPC_PULL_OS_CTX 0x818 /* Load32/Load64 Pull/Invalidate OS + * context to reg */ +#define TM_SPC_PULL_POOL_CTX 0x828 /* Load32/Load64 Pull/Invalidate Pool + * context to reg*/ +#define TM_SPC_ACK_HV_REG 0x830 /* Load16 ack HV irq to reg */ +#define TM_SPC_PULL_USR_CTX_OL 0xc08 /* Store8 Pull/Inval usr ctx to odd + * line */ +#define TM_SPC_ACK_OS_EL 0xc10 /* Store8 ack OS irq to even line */ +#define TM_SPC_ACK_HV_POOL_EL 0xc20 /* Store8 ack HV evt pool to even + * line */ +#define TM_SPC_ACK_HV_EL 0xc30 /* Store8 ack HV irq to even line */ +/* XXX more... */ + +/* NSR fields for the various QW ack types */ +#define TM_QW0_NSR_EB PPC_BIT8(0) +#define TM_QW1_NSR_EO PPC_BIT8(0) +#define TM_QW3_NSR_HE PPC_BITMASK8(0, 1) +#define TM_QW3_NSR_HE_NONE 0 +#define TM_QW3_NSR_HE_POOL 1 +#define TM_QW3_NSR_HE_PHYS 2 +#define TM_QW3_NSR_HE_LSI 3 +#define TM_QW3_NSR_I PPC_BIT8(2) +#define TM_QW3_NSR_GRP_LVL PPC_BIT8(3, 7) + +/* + * EAS (Event Assignment Structure) + * + * One per interrupt source. Targets an interrupt to a given Event + * Notification Descriptor (END) and provides the corresponding + * logical interrupt number (END data) + */ +typedef struct XiveEAS { + /* + * Use a single 64-bit definition to make it easier to perform + * atomic updates + */ + uint64_t w; +#define EAS_VALID PPC_BIT(0) +#define EAS_END_BLOCK PPC_BITMASK(4, 7) /* Destination END block# */ +#define EAS_END_INDEX PPC_BITMASK(8, 31) /* Destination END index */ +#define EAS_MASKED PPC_BIT(32) /* Masked */ +#define EAS_END_DATA PPC_BITMASK(33, 63) /* Data written to the END */ +} XiveEAS; + +#define xive_eas_is_valid(eas) (be64_to_cpu((eas)->w) & EAS_VALID) +#define xive_eas_is_masked(eas) (be64_to_cpu((eas)->w) & EAS_MASKED) + +static inline uint64_t xive_get_field64(uint64_t mask, uint64_t word) +{ + return (be64_to_cpu(word) & mask) >> ctz64(mask); +} + +static inline uint64_t xive_set_field64(uint64_t mask, uint64_t word, + uint64_t value) +{ + uint64_t tmp = + (be64_to_cpu(word) & ~mask) | ((value << ctz64(mask)) & mask); + return cpu_to_be64(tmp); +} + +static inline uint32_t xive_get_field32(uint32_t mask, uint32_t word) +{ + return (be32_to_cpu(word) & mask) >> ctz32(mask); +} + +static inline uint32_t xive_set_field32(uint32_t mask, uint32_t word, + uint32_t value) +{ + uint32_t tmp = + (be32_to_cpu(word) & ~mask) | ((value << ctz32(mask)) & mask); + return cpu_to_be32(tmp); +} + +/* Event Notification Descriptor (END) */ +typedef struct XiveEND { + uint32_t w0; +#define END_W0_VALID PPC_BIT32(0) /* "v" bit */ +#define END_W0_ENQUEUE PPC_BIT32(1) /* "q" bit */ +#define END_W0_UCOND_NOTIFY PPC_BIT32(2) /* "n" bit */ +#define END_W0_BACKLOG PPC_BIT32(3) /* "b" bit */ +#define END_W0_PRECL_ESC_CTL PPC_BIT32(4) /* "p" bit */ +#define END_W0_ESCALATE_CTL PPC_BIT32(5) /* "e" bit */ +#define END_W0_UNCOND_ESCALATE PPC_BIT32(6) /* "u" bit - DD2.0 */ +#define END_W0_SILENT_ESCALATE PPC_BIT32(7) /* "s" bit - DD2.0 */ +#define END_W0_QSIZE PPC_BITMASK32(12, 15) +#define END_W0_SW0 PPC_BIT32(16) +#define END_W0_FIRMWARE END_W0_SW0 /* Owned by FW */ +#define END_QSIZE_4K 0 +#define END_QSIZE_64K 4 +#define END_W0_HWDEP PPC_BITMASK32(24, 31) + uint32_t w1; +#define END_W1_ESn PPC_BITMASK32(0, 1) +#define END_W1_ESn_P PPC_BIT32(0) +#define END_W1_ESn_Q PPC_BIT32(1) +#define END_W1_ESe PPC_BITMASK32(2, 3) +#define END_W1_ESe_P PPC_BIT32(2) +#define END_W1_ESe_Q PPC_BIT32(3) +#define END_W1_GENERATION PPC_BIT32(9) +#define END_W1_PAGE_OFF PPC_BITMASK32(10, 31) + uint32_t w2; +#define END_W2_MIGRATION_REG PPC_BITMASK32(0, 3) +#define END_W2_OP_DESC_HI PPC_BITMASK32(4, 31) + uint32_t w3; +#define END_W3_OP_DESC_LO PPC_BITMASK32(0, 31) + uint32_t w4; +#define END_W4_ESC_END_BLOCK PPC_BITMASK32(4, 7) +#define END_W4_ESC_END_INDEX PPC_BITMASK32(8, 31) + uint32_t w5; +#define END_W5_ESC_END_DATA PPC_BITMASK32(1, 31) + uint32_t w6; +#define END_W6_FORMAT_BIT PPC_BIT32(8) +#define END_W6_NVT_BLOCK PPC_BITMASK32(9, 12) +#define END_W6_NVT_INDEX PPC_BITMASK32(13, 31) + uint32_t w7; +#define END_W7_F0_IGNORE PPC_BIT32(0) +#define END_W7_F0_BLK_GROUPING PPC_BIT32(1) +#define END_W7_F0_PRIORITY PPC_BITMASK32(8, 15) +#define END_W7_F1_WAKEZ PPC_BIT32(0) +#define END_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) +} XiveEND; + +#define xive_end_is_valid(end) (be32_to_cpu((end)->w0) & END_W0_VALID) +#define xive_end_is_enqueue(end) (be32_to_cpu((end)->w0) & END_W0_ENQUEUE) +#define xive_end_is_notify(end) (be32_to_cpu((end)->w0) & END_W0_UCOND_NOTIFY) +#define xive_end_is_backlog(end) (be32_to_cpu((end)->w0) & END_W0_BACKLOG) +#define xive_end_is_escalate(end) (be32_to_cpu((end)->w0) & END_W0_ESCALATE_CTL) + +/* Notification Virtual Target (NVT) */ +typedef struct XiveNVT { + uint32_t w0; +#define NVT_W0_VALID PPC_BIT32(0) + uint32_t w1; + uint32_t w2; + uint32_t w3; + uint32_t w4; + uint32_t w5; + uint32_t w6; + uint32_t w7; + uint32_t w8; +#define NVT_W8_GRP_VALID PPC_BIT32(0) + uint32_t w9; + uint32_t wa; + uint32_t wb; + uint32_t wc; + uint32_t wd; + uint32_t we; + uint32_t wf; +} XiveNVT; + +#define xive_nvt_is_valid(nvt) (be32_to_cpu((nvt)->w0) & NVT_W0_VALID) + +#endif /* PPC_XIVE_REGS_H */ diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index 3ab9cd2eb6..b6758c852e 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -36,6 +36,8 @@ extern const PropertyInfo qdev_prop_uuid; extern const PropertyInfo qdev_prop_arraylen; extern const PropertyInfo qdev_prop_link; extern const PropertyInfo qdev_prop_off_auto_pcibar; +extern const PropertyInfo qdev_prop_pcie_link_speed; +extern const PropertyInfo qdev_prop_pcie_link_width; #define DEFINE_PROP(_name, _state, _field, _prop, _type) { \ .name = (_name), \ @@ -217,6 +219,12 @@ extern const PropertyInfo qdev_prop_off_auto_pcibar; #define DEFINE_PROP_OFF_AUTO_PCIBAR(_n, _s, _f, _d) \ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_off_auto_pcibar, \ OffAutoPCIBAR) +#define DEFINE_PROP_PCIE_LINK_SPEED(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pcie_link_speed, \ + PCIExpLinkSpeed) +#define DEFINE_PROP_PCIE_LINK_WIDTH(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pcie_link_width, \ + PCIExpLinkWidth) #define DEFINE_PROP_UUID(_name, _state, _field) { \ .name = (_name), \ diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h index cbd7552e7a..47ef9de869 100644 --- a/include/hw/s390x/tod.h +++ b/include/hw/s390x/tod.h @@ -56,7 +56,7 @@ typedef struct S390TODClass { /* Converts ns to s390's clock format */ static inline uint64_t time2tod(uint64_t ns) { - return (ns << 9) / 125 + (((ns & 0xff10000000000000ull) / 125) << 9); + return (ns << 9) / 125 + (((ns & 0xff80000000000000ull) / 125) << 9); } /* Converts s390's clock format to ns */ diff --git a/include/hw/smbios/ipmi.h b/include/hw/smbios/ipmi.h deleted file mode 100644 index 1c9aae38f2..0000000000 --- a/include/hw/smbios/ipmi.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * IPMI SMBIOS firmware handling - * - * Copyright (c) 2015,2016 Corey Minyard, MontaVista Software, LLC - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#ifndef QEMU_SMBIOS_IPMI_H -#define QEMU_SMBIOS_IPMI_H - -void smbios_build_type_38_table(void); - -#endif /* QEMU_SMBIOS_IPMI_H */ diff --git a/include/qemu/vfio-helpers.h b/include/qemu/vfio-helpers.h index ce7e7b057f..1f057c2b9e 100644 --- a/include/qemu/vfio-helpers.h +++ b/include/qemu/vfio-helpers.h @@ -12,7 +12,6 @@ #ifndef QEMU_VFIO_HELPERS_H #define QEMU_VFIO_HELPERS_H -#include "qemu/typedefs.h" typedef struct QEMUVFIOState QEMUVFIOState; diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index c8efdeb376..e0d15da937 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -62,6 +62,7 @@ void qemu_register_wakeup_support(void); void qemu_system_shutdown_request(ShutdownCause reason); void qemu_system_powerdown_request(void); void qemu_register_powerdown_notifier(Notifier *notifier); +void qemu_register_shutdown_notifier(Notifier *notifier); void qemu_system_debug_request(void); void qemu_system_vmstop_request(RunState reason); void qemu_system_vmstop_request_prepare(void); diff --git a/include/sysemu/whpx.h b/include/sysemu/whpx.h index 89592ae4fa..d200ee01d0 100644 --- a/include/sysemu/whpx.h +++ b/include/sysemu/whpx.h @@ -13,7 +13,6 @@ #ifndef QEMU_WHPX_H #define QEMU_WHPX_H -#include "config-host.h" #include "qemu-common.h" int whpx_init_vcpu(CPUState *cpu); diff --git a/linux-user/host/riscv32/hostdep.h b/linux-user/host/riscv32/hostdep.h new file mode 100644 index 0000000000..adf9edbf2d --- /dev/null +++ b/linux-user/host/riscv32/hostdep.h @@ -0,0 +1,11 @@ +/* + * hostdep.h : things which are dependent on the host architecture + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef RISCV32_HOSTDEP_H +#define RISCV32_HOSTDEP_H + +#endif diff --git a/linux-user/host/riscv64/hostdep.h b/linux-user/host/riscv64/hostdep.h new file mode 100644 index 0000000000..865f0fb9ff --- /dev/null +++ b/linux-user/host/riscv64/hostdep.h @@ -0,0 +1,34 @@ +/* + * hostdep.h : things which are dependent on the host architecture + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef RISCV64_HOSTDEP_H +#define RISCV64_HOSTDEP_H + +/* We have a safe-syscall.inc.S */ +#define HAVE_SAFE_SYSCALL + +#ifndef __ASSEMBLER__ + +/* These are defined by the safe-syscall.inc.S file */ +extern char safe_syscall_start[]; +extern char safe_syscall_end[]; + +/* Adjust the signal context to rewind out of safe-syscall if we're in it */ +static inline void rewind_if_in_safe_syscall(void *puc) +{ + ucontext_t *uc = puc; + unsigned long *pcreg = &uc->uc_mcontext.__gregs[REG_PC]; + + if (*pcreg > (uintptr_t)safe_syscall_start + && *pcreg < (uintptr_t)safe_syscall_end) { + *pcreg = (uintptr_t)safe_syscall_start; + } +} + +#endif /* __ASSEMBLER__ */ + +#endif diff --git a/linux-user/host/riscv64/safe-syscall.inc.S b/linux-user/host/riscv64/safe-syscall.inc.S new file mode 100644 index 0000000000..9ca3fbfd1e --- /dev/null +++ b/linux-user/host/riscv64/safe-syscall.inc.S @@ -0,0 +1,77 @@ +/* + * safe-syscall.inc.S : host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * This is intended to be included by linux-user/safe-syscall.S + * + * Written by Richard Henderson <rth@twiddle.net> + * Copyright (C) 2018 Linaro, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + + .global safe_syscall_base + .global safe_syscall_start + .global safe_syscall_end + .type safe_syscall_base, @function + .type safe_syscall_start, @function + .type safe_syscall_end, @function + + /* + * This is the entry point for making a system call. The calling + * convention here is that of a C varargs function with the + * first argument an 'int *' to the signal_pending flag, the + * second one the system call number (as a 'long'), and all further + * arguments being syscall arguments (also 'long'). + * We return a long which is the syscall's return value, which + * may be negative-errno on failure. Conversion to the + * -1-and-errno-set convention is done by the calling wrapper. + */ +safe_syscall_base: + .cfi_startproc + /* + * The syscall calling convention is nearly the same as C: + * we enter with a0 == *signal_pending + * a1 == syscall number + * a2 ... a7 == syscall arguments + * and return the result in a0 + * and the syscall instruction needs + * a7 == syscall number + * a0 ... a5 == syscall arguments + * and returns the result in a0 + * Shuffle everything around appropriately. + */ + mv t0, a0 /* signal_pending pointer */ + mv t1, a1 /* syscall number */ + mv a0, a2 /* syscall arguments */ + mv a1, a3 + mv a2, a4 + mv a3, a5 + mv a4, a6 + mv a5, a7 + mv a7, t1 + + /* + * This next sequence of code works in conjunction with the + * rewind_if_safe_syscall_function(). If a signal is taken + * and the interrupted PC is anywhere between 'safe_syscall_start' + * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. + * The code sequence must therefore be able to cope with this, and + * the syscall instruction must be the final one in the sequence. + */ +safe_syscall_start: + /* If signal_pending is non-zero, don't do the call */ + lw t1, 0(t0) + bnez t1, 0f + scall +safe_syscall_end: + /* code path for having successfully executed the syscall */ + ret + +0: + /* code path when we didn't execute the syscall */ + li a0, -TARGET_ERESTARTSYS + ret + .cfi_endproc + + .size safe_syscall_base, .-safe_syscall_base diff --git a/qapi/common.json b/qapi/common.json index 021174f04e..99d313ef3b 100644 --- a/qapi/common.json +++ b/qapi/common.json @@ -128,6 +128,48 @@ 'data': [ 'off', 'auto', 'bar0', 'bar1', 'bar2', 'bar3', 'bar4', 'bar5' ] } ## +# @PCIELinkSpeed: +# +# An enumeration of PCIe link speeds in units of GT/s +# +# @2_5: 2.5GT/s +# +# @5: 5.0GT/s +# +# @8: 8.0GT/s +# +# @16: 16.0GT/s +# +# Since: 4.0 +## +{ 'enum': 'PCIELinkSpeed', + 'data': [ '2_5', '5', '8', '16' ] } + +## +# @PCIELinkWidth: +# +# An enumeration of PCIe link width +# +# @1: x1 +# +# @2: x2 +# +# @4: x4 +# +# @8: x8 +# +# @12: x12 +# +# @16: x16 +# +# @32: x32 +# +# Since: 4.0 +## +{ 'enum': 'PCIELinkWidth', + 'data': [ '1', '2', '4', '8', '12', '16', '32' ] } + +## # @SysEmuTarget: # # The comprehensive enumeration of QEMU system emulation ("softmmu") diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json index 65b6dc2f6f..3bbdfcee84 100644 --- a/qapi/qapi-schema.json +++ b/qapi/qapi-schema.json @@ -86,6 +86,7 @@ { 'include': 'char.json' } { 'include': 'job.json' } { 'include': 'net.json' } +{ 'include': 'rdma.json' } { 'include': 'rocker.json' } { 'include': 'tpm.json' } { 'include': 'ui.json' } diff --git a/qapi/rdma.json b/qapi/rdma.json new file mode 100644 index 0000000000..b58105b1b6 --- /dev/null +++ b/qapi/rdma.json @@ -0,0 +1,38 @@ +# -*- Mode: Python -*- +# + +## +# = RDMA device +## + +## +# @RDMA_GID_STATUS_CHANGED: +# +# Emitted when guest driver adds/deletes GID to/from device +# +# @netdev: RoCE Network Device name +# +# @gid-status: Add or delete indication +# +# @subnet-prefix: Subnet Prefix +# +# @interface-id : Interface ID +# +# Since: 4.0 +# +# Example: +# +# <- {"timestamp": {"seconds": 1541579657, "microseconds": 986760}, +# "event": "RDMA_GID_STATUS_CHANGED", +# "data": +# {"netdev": "bridge0", +# "interface-id": 15880512517475447892, +# "gid-status": true, +# "subnet-prefix": 33022}} +# +## +{ 'event': 'RDMA_GID_STATUS_CHANGED', + 'data': { 'netdev' : 'str', + 'gid-status' : 'bool', + 'subnet-prefix' : 'uint64', + 'interface-id' : 'uint64' } } diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi index e362d37225..c3735b698e 100644 --- a/qemu-deprecated.texi +++ b/qemu-deprecated.texi @@ -134,7 +134,7 @@ their usecases. @section System emulator machines -@subsection pc-0.10 and pc-0.11 (since 3.0) +@subsection pc-0.12, pc-0.13, pc-0.14 and pc-0.15 (since 4.0) These machine types are very old and likely can not be used for live migration from old QEMU versions anymore. A newer machine type should be used instead. diff --git a/target/i386/sev.c b/target/i386/sev.c index 2395171acf..20b2d325d8 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -11,12 +11,13 @@ * */ +#include "qemu/osdep.h" + #include <linux/kvm.h> #include <linux/psp-sev.h> #include <sys/ioctl.h> -#include "qemu/osdep.h" #include "qapi/error.h" #include "qom/object_interfaces.h" #include "qemu/base64.h" diff --git a/target/i386/whp-dispatch.h b/target/i386/whp-dispatch.h index d8d3485976..4ae3cc8fa5 100644 --- a/target/i386/whp-dispatch.h +++ b/target/i386/whp-dispatch.h @@ -1,5 +1,4 @@ #include "windows.h" -#include <stdbool.h> #include <WinHvPlatform.h> #include <WinHvEmulation.h> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index ab68abe8a2..d5f99f1fc7 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -70,26 +70,14 @@ #define PPC_ELF_MACHINE EM_PPC #endif -#define PPC_BIT(bit) (0x8000000000000000UL >> (bit)) -#define PPC_BIT32(bit) (0x80000000UL >> (bit)) -#define PPC_BIT8(bit) (0x80UL >> (bit)) +#define PPC_BIT(bit) (0x8000000000000000ULL >> (bit)) +#define PPC_BIT32(bit) (0x80000000 >> (bit)) +#define PPC_BIT8(bit) (0x80 >> (bit)) #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs)) #define PPC_BITMASK32(bs, be) ((PPC_BIT32(bs) - PPC_BIT32(be)) | \ PPC_BIT32(bs)) #define PPC_BITMASK8(bs, be) ((PPC_BIT8(bs) - PPC_BIT8(be)) | PPC_BIT8(bs)) -#if HOST_LONG_BITS == 32 -# define MASK_TO_LSH(m) (__builtin_ffsll(m) - 1) -#elif HOST_LONG_BITS == 64 -# define MASK_TO_LSH(m) (__builtin_ffsl(m) - 1) -#else -# error Unknown sizeof long -#endif - -#define GETFIELD(m, v) (((v) & (m)) >> MASK_TO_LSH(m)) -#define SETFIELD(m, v, val) \ - (((v) & ~(m)) | ((((typeof(v))(val)) << MASK_TO_LSH(m)) & (m))) - /*****************************************************************************/ /* Exception vectors definitions */ enum { diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 2b37910248..96894ab9a8 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -849,7 +849,7 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0, static inline void gen_op_arith_compute_ca32(DisasContext *ctx, TCGv res, TCGv arg0, TCGv arg1, - int sub) + TCGv ca32, int sub) { TCGv t0; @@ -864,13 +864,14 @@ static inline void gen_op_arith_compute_ca32(DisasContext *ctx, tcg_gen_xor_tl(t0, arg0, arg1); } tcg_gen_xor_tl(t0, t0, res); - tcg_gen_extract_tl(cpu_ca32, t0, 32, 1); + tcg_gen_extract_tl(ca32, t0, 32, 1); tcg_temp_free(t0); } /* Common add function */ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, - TCGv arg2, bool add_ca, bool compute_ca, + TCGv arg2, TCGv ca, TCGv ca32, + bool add_ca, bool compute_ca, bool compute_ov, bool compute_rc0) { TCGv t0 = ret; @@ -888,29 +889,29 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_xor_tl(t1, arg1, arg2); /* add without carry */ tcg_gen_add_tl(t0, arg1, arg2); if (add_ca) { - tcg_gen_add_tl(t0, t0, cpu_ca); + tcg_gen_add_tl(t0, t0, ca); } - tcg_gen_xor_tl(cpu_ca, t0, t1); /* bits changed w/ carry */ + tcg_gen_xor_tl(ca, t0, t1); /* bits changed w/ carry */ tcg_temp_free(t1); - tcg_gen_extract_tl(cpu_ca, cpu_ca, 32, 1); + tcg_gen_extract_tl(ca, ca, 32, 1); if (is_isa300(ctx)) { - tcg_gen_mov_tl(cpu_ca32, cpu_ca); + tcg_gen_mov_tl(ca32, ca); } } else { TCGv zero = tcg_const_tl(0); if (add_ca) { - tcg_gen_add2_tl(t0, cpu_ca, arg1, zero, cpu_ca, zero); - tcg_gen_add2_tl(t0, cpu_ca, t0, cpu_ca, arg2, zero); + tcg_gen_add2_tl(t0, ca, arg1, zero, ca, zero); + tcg_gen_add2_tl(t0, ca, t0, ca, arg2, zero); } else { - tcg_gen_add2_tl(t0, cpu_ca, arg1, zero, arg2, zero); + tcg_gen_add2_tl(t0, ca, arg1, zero, arg2, zero); } - gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 0); + gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, ca32, 0); tcg_temp_free(zero); } } else { tcg_gen_add_tl(t0, arg1, arg2); if (add_ca) { - tcg_gen_add_tl(t0, t0, cpu_ca); + tcg_gen_add_tl(t0, t0, ca); } } @@ -927,40 +928,44 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, } } /* Add functions with two operands */ -#define GEN_INT_ARITH_ADD(name, opc3, add_ca, compute_ca, compute_ov) \ +#define GEN_INT_ARITH_ADD(name, opc3, ca, add_ca, compute_ca, compute_ov) \ static void glue(gen_, name)(DisasContext *ctx) \ { \ gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)], \ cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \ + ca, glue(ca, 32), \ add_ca, compute_ca, compute_ov, Rc(ctx->opcode)); \ } /* Add functions with one operand and one immediate */ -#define GEN_INT_ARITH_ADD_CONST(name, opc3, const_val, \ +#define GEN_INT_ARITH_ADD_CONST(name, opc3, const_val, ca, \ add_ca, compute_ca, compute_ov) \ static void glue(gen_, name)(DisasContext *ctx) \ { \ TCGv t0 = tcg_const_tl(const_val); \ gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)], \ cpu_gpr[rA(ctx->opcode)], t0, \ + ca, glue(ca, 32), \ add_ca, compute_ca, compute_ov, Rc(ctx->opcode)); \ tcg_temp_free(t0); \ } /* add add. addo addo. */ -GEN_INT_ARITH_ADD(add, 0x08, 0, 0, 0) -GEN_INT_ARITH_ADD(addo, 0x18, 0, 0, 1) +GEN_INT_ARITH_ADD(add, 0x08, cpu_ca, 0, 0, 0) +GEN_INT_ARITH_ADD(addo, 0x18, cpu_ca, 0, 0, 1) /* addc addc. addco addco. */ -GEN_INT_ARITH_ADD(addc, 0x00, 0, 1, 0) -GEN_INT_ARITH_ADD(addco, 0x10, 0, 1, 1) +GEN_INT_ARITH_ADD(addc, 0x00, cpu_ca, 0, 1, 0) +GEN_INT_ARITH_ADD(addco, 0x10, cpu_ca, 0, 1, 1) /* adde adde. addeo addeo. */ -GEN_INT_ARITH_ADD(adde, 0x04, 1, 1, 0) -GEN_INT_ARITH_ADD(addeo, 0x14, 1, 1, 1) +GEN_INT_ARITH_ADD(adde, 0x04, cpu_ca, 1, 1, 0) +GEN_INT_ARITH_ADD(addeo, 0x14, cpu_ca, 1, 1, 1) /* addme addme. addmeo addmeo. */ -GEN_INT_ARITH_ADD_CONST(addme, 0x07, -1LL, 1, 1, 0) -GEN_INT_ARITH_ADD_CONST(addmeo, 0x17, -1LL, 1, 1, 1) +GEN_INT_ARITH_ADD_CONST(addme, 0x07, -1LL, cpu_ca, 1, 1, 0) +GEN_INT_ARITH_ADD_CONST(addmeo, 0x17, -1LL, cpu_ca, 1, 1, 1) +/* addex */ +GEN_INT_ARITH_ADD(addex, 0x05, cpu_ov, 1, 1, 0); /* addze addze. addzeo addzeo.*/ -GEN_INT_ARITH_ADD_CONST(addze, 0x06, 0, 1, 1, 0) -GEN_INT_ARITH_ADD_CONST(addzeo, 0x16, 0, 1, 1, 1) +GEN_INT_ARITH_ADD_CONST(addze, 0x06, 0, cpu_ca, 1, 1, 0) +GEN_INT_ARITH_ADD_CONST(addzeo, 0x16, 0, cpu_ca, 1, 1, 1) /* addi */ static void gen_addi(DisasContext *ctx) { @@ -979,7 +984,7 @@ static inline void gen_op_addic(DisasContext *ctx, bool compute_rc0) { TCGv c = tcg_const_tl(SIMM(ctx->opcode)); gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], - c, 0, 1, 0, compute_rc0); + c, cpu_ca, cpu_ca32, 0, 1, 0, compute_rc0); tcg_temp_free(c); } @@ -1432,13 +1437,13 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1, zero = tcg_const_tl(0); tcg_gen_add2_tl(t0, cpu_ca, arg2, zero, cpu_ca, zero); tcg_gen_add2_tl(t0, cpu_ca, t0, cpu_ca, inv1, zero); - gen_op_arith_compute_ca32(ctx, t0, inv1, arg2, 0); + gen_op_arith_compute_ca32(ctx, t0, inv1, arg2, cpu_ca32, 0); tcg_temp_free(zero); tcg_temp_free(inv1); } else { tcg_gen_setcond_tl(TCG_COND_GEU, cpu_ca, arg2, arg1); tcg_gen_sub_tl(t0, arg2, arg1); - gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 1); + gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, cpu_ca32, 1); } } else if (add_ca) { /* Since we're ignoring carry-out, we can simplify the @@ -7087,6 +7092,7 @@ GEN_INT_ARITH_ADD(adde, 0x04, 1, 1, 0) GEN_INT_ARITH_ADD(addeo, 0x14, 1, 1, 1) GEN_INT_ARITH_ADD_CONST(addme, 0x07, -1LL, 1, 1, 0) GEN_INT_ARITH_ADD_CONST(addmeo, 0x17, -1LL, 1, 1, 1) +GEN_HANDLER_E(addex, 0x1F, 0x0A, 0x05, 0x00000000, PPC_NONE, PPC2_ISA300), GEN_INT_ARITH_ADD_CONST(addze, 0x06, 0, 1, 1, 0) GEN_INT_ARITH_ADD_CONST(addzeo, 0x16, 0, 1, 1, 1) diff --git a/target/ppc/translate/vmx-ops.inc.c b/target/ppc/translate/vmx-ops.inc.c index 139f80cb24..84e05fb827 100644 --- a/target/ppc/translate/vmx-ops.inc.c +++ b/target/ppc/translate/vmx-ops.inc.c @@ -143,7 +143,7 @@ GEN_VXFORM(vaddsws, 0, 14), GEN_VXFORM_DUAL(vsububs, bcdadd, 0, 24, PPC_ALTIVEC, PPC_NONE), GEN_VXFORM_DUAL(vsubuhs, bcdsub, 0, 25, PPC_ALTIVEC, PPC_NONE), GEN_VXFORM(vsubuws, 0, 26), -GEN_VXFORM_DUAL(vsubsbs, bcdtrunc, 0, 28, PPC_NONE, PPC2_ISA300), +GEN_VXFORM_DUAL(vsubsbs, bcdtrunc, 0, 28, PPC_ALTIVEC, PPC2_ISA300), GEN_VXFORM(vsubshs, 0, 29), GEN_VXFORM_DUAL(vsubsws, xpnd04_2, 0, 30, PPC_ALTIVEC, PPC_NONE), GEN_VXFORM_207(vadduqm, 0, 4), diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c index 168d0cec28..03f1d34a97 100644 --- a/target/ppc/translate_init.inc.c +++ b/target/ppc/translate_init.inc.c @@ -9081,13 +9081,13 @@ static void init_ppc_proc(PowerPCCPU *cpu) nb_tlb *= 2; switch (env->tlb_type) { case TLB_6XX: - env->tlb.tlb6 = g_malloc0(nb_tlb * sizeof(ppc6xx_tlb_t)); + env->tlb.tlb6 = g_new0(ppc6xx_tlb_t, nb_tlb); break; case TLB_EMB: - env->tlb.tlbe = g_malloc0(nb_tlb * sizeof(ppcemb_tlb_t)); + env->tlb.tlbe = g_new0(ppcemb_tlb_t, nb_tlb); break; case TLB_MAS: - env->tlb.tlbm = g_malloc0(nb_tlb * sizeof(ppcmas_tlb_t)); + env->tlb.tlbm = g_new0(ppcmas_tlb_t, nb_tlb); break; } /* Pre-compute some useful values */ diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c index fdb87d8d82..01b45ca0ae 100644 --- a/target/riscv/fpu_helper.c +++ b/target/riscv/fpu_helper.c @@ -17,7 +17,6 @@ */ #include "qemu/osdep.h" -#include <stdlib.h> #include "cpu.h" #include "qemu/host-utils.h" #include "exec/exec-all.h" diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h new file mode 100644 index 0000000000..60918cacb4 --- /dev/null +++ b/tcg/riscv/tcg-target.h @@ -0,0 +1,177 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2018 SiFive, Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef RISCV_TCG_TARGET_H +#define RISCV_TCG_TARGET_H + +#if __riscv_xlen == 32 +# define TCG_TARGET_REG_BITS 32 +#elif __riscv_xlen == 64 +# define TCG_TARGET_REG_BITS 64 +#endif + +#define TCG_TARGET_INSN_UNIT_SIZE 4 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 20 +#define TCG_TARGET_NB_REGS 32 + +typedef enum { + TCG_REG_ZERO, + TCG_REG_RA, + TCG_REG_SP, + TCG_REG_GP, + TCG_REG_TP, + TCG_REG_T0, + TCG_REG_T1, + TCG_REG_T2, + TCG_REG_S0, + TCG_REG_S1, + TCG_REG_A0, + TCG_REG_A1, + TCG_REG_A2, + TCG_REG_A3, + TCG_REG_A4, + TCG_REG_A5, + TCG_REG_A6, + TCG_REG_A7, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + TCG_REG_S5, + TCG_REG_S6, + TCG_REG_S7, + TCG_REG_S8, + TCG_REG_S9, + TCG_REG_S10, + TCG_REG_S11, + TCG_REG_T3, + TCG_REG_T4, + TCG_REG_T5, + TCG_REG_T6, + + /* aliases */ + TCG_AREG0 = TCG_REG_S0, + TCG_GUEST_BASE_REG = TCG_REG_S1, + TCG_REG_TMP0 = TCG_REG_T6, + TCG_REG_TMP1 = TCG_REG_T5, + TCG_REG_TMP2 = TCG_REG_T4, +} TCGReg; + +/* used for function call generation */ +#define TCG_REG_CALL_STACK TCG_REG_SP +#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_CALL_ALIGN_ARGS 1 +#define TCG_TARGET_CALL_STACK_OFFSET 0 + +/* optional instructions */ +#define TCG_TARGET_HAS_goto_ptr 1 +#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_div_i32 1 +#define TCG_TARGET_HAS_rem_i32 1 +#define TCG_TARGET_HAS_div2_i32 0 +#define TCG_TARGET_HAS_rot_i32 0 +#define TCG_TARGET_HAS_deposit_i32 0 +#define TCG_TARGET_HAS_extract_i32 0 +#define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 0 +#define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 (TCG_TARGET_REG_BITS == 32) +#define TCG_TARGET_HAS_mulsh_i32 (TCG_TARGET_REG_BITS == 32) +#define TCG_TARGET_HAS_ext8s_i32 1 +#define TCG_TARGET_HAS_ext16s_i32 1 +#define TCG_TARGET_HAS_ext8u_i32 1 +#define TCG_TARGET_HAS_ext16u_i32 1 +#define TCG_TARGET_HAS_bswap16_i32 0 +#define TCG_TARGET_HAS_bswap32_i32 0 +#define TCG_TARGET_HAS_not_i32 1 +#define TCG_TARGET_HAS_neg_i32 1 +#define TCG_TARGET_HAS_andc_i32 0 +#define TCG_TARGET_HAS_orc_i32 0 +#define TCG_TARGET_HAS_eqv_i32 0 +#define TCG_TARGET_HAS_nand_i32 0 +#define TCG_TARGET_HAS_nor_i32 0 +#define TCG_TARGET_HAS_clz_i32 0 +#define TCG_TARGET_HAS_ctz_i32 0 +#define TCG_TARGET_HAS_ctpop_i32 0 +#define TCG_TARGET_HAS_direct_jump 0 +#define TCG_TARGET_HAS_brcond2 1 +#define TCG_TARGET_HAS_setcond2 1 + +#if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_div_i64 1 +#define TCG_TARGET_HAS_rem_i64 1 +#define TCG_TARGET_HAS_div2_i64 0 +#define TCG_TARGET_HAS_rot_i64 0 +#define TCG_TARGET_HAS_deposit_i64 0 +#define TCG_TARGET_HAS_extract_i64 0 +#define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extrl_i64_i32 1 +#define TCG_TARGET_HAS_extrh_i64_i32 1 +#define TCG_TARGET_HAS_ext8s_i64 1 +#define TCG_TARGET_HAS_ext16s_i64 1 +#define TCG_TARGET_HAS_ext32s_i64 1 +#define TCG_TARGET_HAS_ext8u_i64 1 +#define TCG_TARGET_HAS_ext16u_i64 1 +#define TCG_TARGET_HAS_ext32u_i64 1 +#define TCG_TARGET_HAS_bswap16_i64 0 +#define TCG_TARGET_HAS_bswap32_i64 0 +#define TCG_TARGET_HAS_bswap64_i64 0 +#define TCG_TARGET_HAS_not_i64 1 +#define TCG_TARGET_HAS_neg_i64 1 +#define TCG_TARGET_HAS_andc_i64 0 +#define TCG_TARGET_HAS_orc_i64 0 +#define TCG_TARGET_HAS_eqv_i64 0 +#define TCG_TARGET_HAS_nand_i64 0 +#define TCG_TARGET_HAS_nor_i64 0 +#define TCG_TARGET_HAS_clz_i64 0 +#define TCG_TARGET_HAS_ctz_i64 0 +#define TCG_TARGET_HAS_ctpop_i64 0 +#define TCG_TARGET_HAS_add2_i64 1 +#define TCG_TARGET_HAS_sub2_i64 1 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 1 +#define TCG_TARGET_HAS_mulsh_i64 1 +#endif + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + __builtin___clear_cache((char *)start, (char *)stop); +} + +/* not defined -- call should be eliminated at compile time */ +void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); + +#define TCG_TARGET_DEFAULT_MO (0) + +#ifdef CONFIG_SOFTMMU +#define TCG_TARGET_NEED_LDST_LABELS +#endif +#define TCG_TARGET_NEED_POOL_LABELS + +#define TCG_TARGET_HAS_MEMORY_BSWAP 0 + +#endif diff --git a/tcg/riscv/tcg-target.inc.c b/tcg/riscv/tcg-target.inc.c new file mode 100644 index 0000000000..6cf8de32b5 --- /dev/null +++ b/tcg/riscv/tcg-target.inc.c @@ -0,0 +1,1949 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2018 SiFive, Inc + * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org> + * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net> + * Copyright (c) 2008 Fabrice Bellard + * + * Based on i386/tcg-target.c and mips/tcg-target.c + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tcg-pool.inc.c" + +#ifdef CONFIG_DEBUG_TCG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "zero", + "ra", + "sp", + "gp", + "tp", + "t0", + "t1", + "t2", + "s0", + "s1", + "a0", + "a1", + "a2", + "a3", + "a4", + "a5", + "a6", + "a7", + "s2", + "s3", + "s4", + "s5", + "s6", + "s7", + "s8", + "s9", + "s10", + "s11", + "t3", + "t4", + "t5", + "t6" +}; +#endif + +static const int tcg_target_reg_alloc_order[] = { + /* Call saved registers */ + /* TCG_REG_S0 reservered for TCG_AREG0 */ + TCG_REG_S1, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + TCG_REG_S5, + TCG_REG_S6, + TCG_REG_S7, + TCG_REG_S8, + TCG_REG_S9, + TCG_REG_S10, + TCG_REG_S11, + + /* Call clobbered registers */ + TCG_REG_T0, + TCG_REG_T1, + TCG_REG_T2, + TCG_REG_T3, + TCG_REG_T4, + TCG_REG_T5, + TCG_REG_T6, + + /* Argument registers */ + TCG_REG_A0, + TCG_REG_A1, + TCG_REG_A2, + TCG_REG_A3, + TCG_REG_A4, + TCG_REG_A5, + TCG_REG_A6, + TCG_REG_A7, +}; + +static const int tcg_target_call_iarg_regs[] = { + TCG_REG_A0, + TCG_REG_A1, + TCG_REG_A2, + TCG_REG_A3, + TCG_REG_A4, + TCG_REG_A5, + TCG_REG_A6, + TCG_REG_A7, +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_A0, + TCG_REG_A1, +}; + +#define TCG_CT_CONST_ZERO 0x100 +#define TCG_CT_CONST_S12 0x200 +#define TCG_CT_CONST_N12 0x400 +#define TCG_CT_CONST_M12 0x800 + +static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len) +{ + if (TCG_TARGET_REG_BITS == 32) { + return sextract32(val, pos, len); + } else { + return sextract64(val, pos, len); + } +} + +/* parse target specific constraints */ +static const char *target_parse_constraint(TCGArgConstraint *ct, + const char *ct_str, TCGType type) +{ + switch (*ct_str++) { + case 'r': + ct->ct |= TCG_CT_REG; + ct->u.regs = 0xffffffff; + break; + case 'L': + /* qemu_ld/qemu_st constraint */ + ct->ct |= TCG_CT_REG; + ct->u.regs = 0xffffffff; + /* qemu_ld/qemu_st uses TCG_REG_TMP0 */ +#if defined(CONFIG_SOFTMMU) + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]); + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]); + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]); + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[3]); + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[4]); +#endif + break; + case 'I': + ct->ct |= TCG_CT_CONST_S12; + break; + case 'N': + ct->ct |= TCG_CT_CONST_N12; + break; + case 'M': + ct->ct |= TCG_CT_CONST_M12; + break; + case 'Z': + /* we can use a zero immediate as a zero register argument. */ + ct->ct |= TCG_CT_CONST_ZERO; + break; + default: + return NULL; + } + return ct_str; +} + +/* test if a constant matches the constraint */ +static int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } + if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } + if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) { + return 1; + } + if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) { + return 1; + } + if ((ct & TCG_CT_CONST_M12) && val >= -0xfff && val <= 0xfff) { + return 1; + } + return 0; +} + +/* + * RISC-V Base ISA opcodes (IM) + */ + +typedef enum { + OPC_ADD = 0x33, + OPC_ADDI = 0x13, + OPC_AND = 0x7033, + OPC_ANDI = 0x7013, + OPC_AUIPC = 0x17, + OPC_BEQ = 0x63, + OPC_BGE = 0x5063, + OPC_BGEU = 0x7063, + OPC_BLT = 0x4063, + OPC_BLTU = 0x6063, + OPC_BNE = 0x1063, + OPC_DIV = 0x2004033, + OPC_DIVU = 0x2005033, + OPC_JAL = 0x6f, + OPC_JALR = 0x67, + OPC_LB = 0x3, + OPC_LBU = 0x4003, + OPC_LD = 0x3003, + OPC_LH = 0x1003, + OPC_LHU = 0x5003, + OPC_LUI = 0x37, + OPC_LW = 0x2003, + OPC_LWU = 0x6003, + OPC_MUL = 0x2000033, + OPC_MULH = 0x2001033, + OPC_MULHSU = 0x2002033, + OPC_MULHU = 0x2003033, + OPC_OR = 0x6033, + OPC_ORI = 0x6013, + OPC_REM = 0x2006033, + OPC_REMU = 0x2007033, + OPC_SB = 0x23, + OPC_SD = 0x3023, + OPC_SH = 0x1023, + OPC_SLL = 0x1033, + OPC_SLLI = 0x1013, + OPC_SLT = 0x2033, + OPC_SLTI = 0x2013, + OPC_SLTIU = 0x3013, + OPC_SLTU = 0x3033, + OPC_SRA = 0x40005033, + OPC_SRAI = 0x40005013, + OPC_SRL = 0x5033, + OPC_SRLI = 0x5013, + OPC_SUB = 0x40000033, + OPC_SW = 0x2023, + OPC_XOR = 0x4033, + OPC_XORI = 0x4013, + +#if TCG_TARGET_REG_BITS == 64 + OPC_ADDIW = 0x1b, + OPC_ADDW = 0x3b, + OPC_DIVUW = 0x200503b, + OPC_DIVW = 0x200403b, + OPC_MULW = 0x200003b, + OPC_REMUW = 0x200703b, + OPC_REMW = 0x200603b, + OPC_SLLIW = 0x101b, + OPC_SLLW = 0x103b, + OPC_SRAIW = 0x4000501b, + OPC_SRAW = 0x4000503b, + OPC_SRLIW = 0x501b, + OPC_SRLW = 0x503b, + OPC_SUBW = 0x4000003b, +#else + /* Simplify code throughout by defining aliases for RV32. */ + OPC_ADDIW = OPC_ADDI, + OPC_ADDW = OPC_ADD, + OPC_DIVUW = OPC_DIVU, + OPC_DIVW = OPC_DIV, + OPC_MULW = OPC_MUL, + OPC_REMUW = OPC_REMU, + OPC_REMW = OPC_REM, + OPC_SLLIW = OPC_SLLI, + OPC_SLLW = OPC_SLL, + OPC_SRAIW = OPC_SRAI, + OPC_SRAW = OPC_SRA, + OPC_SRLIW = OPC_SRLI, + OPC_SRLW = OPC_SRL, + OPC_SUBW = OPC_SUB, +#endif + + OPC_FENCE = 0x0000000f, +} RISCVInsn; + +/* + * RISC-V immediate and instruction encoders (excludes 16-bit RVC) + */ + +/* Type-R */ + +static int32_t encode_r(RISCVInsn opc, TCGReg rd, TCGReg rs1, TCGReg rs2) +{ + return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20; +} + +/* Type-I */ + +static int32_t encode_imm12(uint32_t imm) +{ + return (imm & 0xfff) << 20; +} + +static int32_t encode_i(RISCVInsn opc, TCGReg rd, TCGReg rs1, uint32_t imm) +{ + return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | encode_imm12(imm); +} + +/* Type-S */ + +static int32_t encode_simm12(uint32_t imm) +{ + int32_t ret = 0; + + ret |= (imm & 0xFE0) << 20; + ret |= (imm & 0x1F) << 7; + + return ret; +} + +static int32_t encode_s(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm) +{ + return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_simm12(imm); +} + +/* Type-SB */ + +static int32_t encode_sbimm12(uint32_t imm) +{ + int32_t ret = 0; + + ret |= (imm & 0x1000) << 19; + ret |= (imm & 0x7e0) << 20; + ret |= (imm & 0x1e) << 7; + ret |= (imm & 0x800) >> 4; + + return ret; +} + +static int32_t encode_sb(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm) +{ + return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_sbimm12(imm); +} + +/* Type-U */ + +static int32_t encode_uimm20(uint32_t imm) +{ + return imm & 0xfffff000; +} + +static int32_t encode_u(RISCVInsn opc, TCGReg rd, uint32_t imm) +{ + return opc | (rd & 0x1f) << 7 | encode_uimm20(imm); +} + +/* Type-UJ */ + +static int32_t encode_ujimm20(uint32_t imm) +{ + int32_t ret = 0; + + ret |= (imm & 0x0007fe) << (21 - 1); + ret |= (imm & 0x000800) << (20 - 11); + ret |= (imm & 0x0ff000) << (12 - 12); + ret |= (imm & 0x100000) << (31 - 20); + + return ret; +} + +static int32_t encode_uj(RISCVInsn opc, TCGReg rd, uint32_t imm) +{ + return opc | (rd & 0x1f) << 7 | encode_ujimm20(imm); +} + +/* + * RISC-V instruction emitters + */ + +static void tcg_out_opc_reg(TCGContext *s, RISCVInsn opc, + TCGReg rd, TCGReg rs1, TCGReg rs2) +{ + tcg_out32(s, encode_r(opc, rd, rs1, rs2)); +} + +static void tcg_out_opc_imm(TCGContext *s, RISCVInsn opc, + TCGReg rd, TCGReg rs1, TCGArg imm) +{ + tcg_out32(s, encode_i(opc, rd, rs1, imm)); +} + +static void tcg_out_opc_store(TCGContext *s, RISCVInsn opc, + TCGReg rs1, TCGReg rs2, uint32_t imm) +{ + tcg_out32(s, encode_s(opc, rs1, rs2, imm)); +} + +static void tcg_out_opc_branch(TCGContext *s, RISCVInsn opc, + TCGReg rs1, TCGReg rs2, uint32_t imm) +{ + tcg_out32(s, encode_sb(opc, rs1, rs2, imm)); +} + +static void tcg_out_opc_upper(TCGContext *s, RISCVInsn opc, + TCGReg rd, uint32_t imm) +{ + tcg_out32(s, encode_u(opc, rd, imm)); +} + +static void tcg_out_opc_jump(TCGContext *s, RISCVInsn opc, + TCGReg rd, uint32_t imm) +{ + tcg_out32(s, encode_uj(opc, rd, imm)); +} + +static void tcg_out_nop_fill(tcg_insn_unit *p, int count) +{ + int i; + for (i = 0; i < count; ++i) { + p[i] = encode_i(OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0); + } +} + +/* + * Relocations + */ + +static bool reloc_sbimm12(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + intptr_t offset = (intptr_t)target - (intptr_t)code_ptr; + + if (offset == sextreg(offset, 1, 12) << 1) { + code_ptr[0] |= encode_sbimm12(offset); + return true; + } + + return false; +} + +static bool reloc_jimm20(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + intptr_t offset = (intptr_t)target - (intptr_t)code_ptr; + + if (offset == sextreg(offset, 1, 20) << 1) { + code_ptr[0] |= encode_ujimm20(offset); + return true; + } + + return false; +} + +static bool reloc_call(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + intptr_t offset = (intptr_t)target - (intptr_t)code_ptr; + int32_t lo = sextreg(offset, 0, 12); + int32_t hi = offset - lo; + + if (offset == hi + lo) { + code_ptr[0] |= encode_uimm20(hi); + code_ptr[1] |= encode_imm12(lo); + return true; + } + + return false; +} + +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + uint32_t insn = *code_ptr; + intptr_t diff; + bool short_jmp; + + tcg_debug_assert(addend == 0); + + switch (type) { + case R_RISCV_BRANCH: + diff = value - (uintptr_t)code_ptr; + short_jmp = diff == sextreg(diff, 0, 12); + if (short_jmp) { + return reloc_sbimm12(code_ptr, (tcg_insn_unit *)value); + } else { + /* Invert the condition */ + insn = insn ^ (1 << 12); + /* Clear the offset */ + insn &= 0x01fff07f; + /* Set the offset to the PC + 8 */ + insn |= encode_sbimm12(8); + + /* Move forward */ + code_ptr[0] = insn; + + /* Overwrite the NOP with jal x0,value */ + diff = value - (uintptr_t)(code_ptr + 1); + insn = encode_uj(OPC_JAL, TCG_REG_ZERO, diff); + code_ptr[1] = insn; + + return true; + } + break; + case R_RISCV_JAL: + return reloc_jimm20(code_ptr, (tcg_insn_unit *)value); + break; + case R_RISCV_CALL: + return reloc_call(code_ptr, (tcg_insn_unit *)value); + break; + default: + tcg_abort(); + } +} + +/* + * TCG intrinsics + */ + +static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + if (ret == arg) { + return; + } + switch (type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0); + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, + tcg_target_long val) +{ + tcg_target_long lo, hi, tmp; + int shift, ret; + + if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { + val = (int32_t)val; + } + + lo = sextreg(val, 0, 12); + if (val == lo) { + tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, lo); + return; + } + + hi = val - lo; + if (TCG_TARGET_REG_BITS == 32 || val == (int32_t)val) { + tcg_out_opc_upper(s, OPC_LUI, rd, hi); + if (lo != 0) { + tcg_out_opc_imm(s, OPC_ADDIW, rd, rd, lo); + } + return; + } + + /* We can only be here if TCG_TARGET_REG_BITS != 32 */ + tmp = tcg_pcrel_diff(s, (void *)val); + if (tmp == (int32_t)tmp) { + tcg_out_opc_upper(s, OPC_AUIPC, rd, 0); + tcg_out_opc_imm(s, OPC_ADDI, rd, rd, 0); + ret = reloc_call(s->code_ptr - 2, (tcg_insn_unit *)val); + tcg_debug_assert(ret == true); + return; + } + + /* Look for a single 20-bit section. */ + shift = ctz64(val); + tmp = val >> shift; + if (tmp == sextreg(tmp, 0, 20)) { + tcg_out_opc_upper(s, OPC_LUI, rd, tmp << 12); + if (shift > 12) { + tcg_out_opc_imm(s, OPC_SLLI, rd, rd, shift - 12); + } else { + tcg_out_opc_imm(s, OPC_SRAI, rd, rd, 12 - shift); + } + return; + } + + /* Look for a few high zero bits, with lots of bits set in the middle. */ + shift = clz64(val); + tmp = val << shift; + if (tmp == sextreg(tmp, 12, 20) << 12) { + tcg_out_opc_upper(s, OPC_LUI, rd, tmp); + tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift); + return; + } else if (tmp == sextreg(tmp, 0, 12)) { + tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, tmp); + tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift); + return; + } + + /* Drop into the constant pool. */ + new_pool_label(s, val, R_RISCV_CALL, s->code_ptr, 0); + tcg_out_opc_upper(s, OPC_AUIPC, rd, 0); + tcg_out_opc_imm(s, OPC_LD, rd, rd, 0); +} + +static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_imm(s, OPC_ANDI, ret, arg, 0xff); +} + +static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16); + tcg_out_opc_imm(s, OPC_SRLIW, ret, ret, 16); +} + +static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_imm(s, OPC_SLLI, ret, arg, 32); + tcg_out_opc_imm(s, OPC_SRLI, ret, ret, 32); +} + +static void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 24); + tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 24); +} + +static void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16); + tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 16); +} + +static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_imm(s, OPC_ADDIW, ret, arg, 0); +} + +static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, + TCGReg addr, intptr_t offset) +{ + intptr_t imm12 = sextreg(offset, 0, 12); + + if (offset != imm12) { + intptr_t diff = offset - (uintptr_t)s->code_ptr; + + if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { + imm12 = sextreg(diff, 0, 12); + tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12); + if (addr != TCG_REG_ZERO) { + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr); + } + } + addr = TCG_REG_TMP2; + } + + switch (opc) { + case OPC_SB: + case OPC_SH: + case OPC_SW: + case OPC_SD: + tcg_out_opc_store(s, opc, addr, data, imm12); + break; + case OPC_LB: + case OPC_LBU: + case OPC_LH: + case OPC_LHU: + case OPC_LW: + case OPC_LWU: + case OPC_LD: + tcg_out_opc_imm(s, opc, data, addr, imm12); + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + bool is32bit = (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32); + tcg_out_ldst(s, is32bit ? OPC_LW : OPC_LD, arg, arg1, arg2); +} + +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + bool is32bit = (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32); + tcg_out_ldst(s, is32bit ? OPC_SW : OPC_SD, arg, arg1, arg2); +} + +static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) +{ + if (val == 0) { + tcg_out_st(s, type, TCG_REG_ZERO, base, ofs); + return true; + } + return false; +} + +static void tcg_out_addsub2(TCGContext *s, + TCGReg rl, TCGReg rh, + TCGReg al, TCGReg ah, + TCGArg bl, TCGArg bh, + bool cbl, bool cbh, bool is_sub, bool is32bit) +{ + const RISCVInsn opc_add = is32bit ? OPC_ADDW : OPC_ADD; + const RISCVInsn opc_addi = is32bit ? OPC_ADDIW : OPC_ADDI; + const RISCVInsn opc_sub = is32bit ? OPC_SUBW : OPC_SUB; + TCGReg th = TCG_REG_TMP1; + + /* If we have a negative constant such that negating it would + make the high part zero, we can (usually) eliminate one insn. */ + if (cbl && cbh && bh == -1 && bl != 0) { + bl = -bl; + bh = 0; + is_sub = !is_sub; + } + + /* By operating on the high part first, we get to use the final + carry operation to move back from the temporary. */ + if (!cbh) { + tcg_out_opc_reg(s, (is_sub ? opc_sub : opc_add), th, ah, bh); + } else if (bh != 0 || ah == rl) { + tcg_out_opc_imm(s, opc_addi, th, ah, (is_sub ? -bh : bh)); + } else { + th = ah; + } + + /* Note that tcg optimization should eliminate the bl == 0 case. */ + if (is_sub) { + if (cbl) { + tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, al, bl); + tcg_out_opc_imm(s, opc_addi, rl, al, -bl); + } else { + tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, al, bl); + tcg_out_opc_reg(s, opc_sub, rl, al, bl); + } + tcg_out_opc_reg(s, opc_sub, rh, th, TCG_REG_TMP0); + } else { + if (cbl) { + tcg_out_opc_imm(s, opc_addi, rl, al, bl); + tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, rl, bl); + } else if (rl == al && rl == bl) { + tcg_out_opc_imm(s, OPC_SLTI, TCG_REG_TMP0, al, 0); + tcg_out_opc_reg(s, opc_addi, rl, al, bl); + } else { + tcg_out_opc_reg(s, opc_add, rl, al, bl); + tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, + rl, (rl == bl ? al : bl)); + } + tcg_out_opc_reg(s, opc_add, rh, th, TCG_REG_TMP0); + } +} + +static const struct { + RISCVInsn op; + bool swap; +} tcg_brcond_to_riscv[] = { + [TCG_COND_EQ] = { OPC_BEQ, false }, + [TCG_COND_NE] = { OPC_BNE, false }, + [TCG_COND_LT] = { OPC_BLT, false }, + [TCG_COND_GE] = { OPC_BGE, false }, + [TCG_COND_LE] = { OPC_BGE, true }, + [TCG_COND_GT] = { OPC_BLT, true }, + [TCG_COND_LTU] = { OPC_BLTU, false }, + [TCG_COND_GEU] = { OPC_BGEU, false }, + [TCG_COND_LEU] = { OPC_BGEU, true }, + [TCG_COND_GTU] = { OPC_BLTU, true } +}; + +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, + TCGReg arg2, TCGLabel *l) +{ + RISCVInsn op = tcg_brcond_to_riscv[cond].op; + + tcg_debug_assert(op != 0); + + if (tcg_brcond_to_riscv[cond].swap) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + } + + if (l->has_value) { + intptr_t diff = tcg_pcrel_diff(s, l->u.value_ptr); + if (diff == sextreg(diff, 0, 12)) { + tcg_out_opc_branch(s, op, arg1, arg2, diff); + } else { + /* Invert the conditional branch. */ + tcg_out_opc_branch(s, op ^ (1 << 12), arg1, arg2, 8); + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, diff - 4); + } + } else { + tcg_out_reloc(s, s->code_ptr, R_RISCV_BRANCH, l, 0); + tcg_out_opc_branch(s, op, arg1, arg2, 0); + /* NOP to allow patching later */ + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0); + } +} + +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, TCGReg arg2) +{ + switch (cond) { + case TCG_COND_EQ: + tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); + tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1); + break; + case TCG_COND_NE: + tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); + tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret); + break; + case TCG_COND_LT: + tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); + break; + case TCG_COND_GE: + tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + break; + case TCG_COND_LE: + tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + break; + case TCG_COND_GT: + tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); + break; + case TCG_COND_LTU: + tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); + break; + case TCG_COND_GEU: + tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + break; + case TCG_COND_LEU: + tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + break; + case TCG_COND_GTU: + tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); + break; + default: + g_assert_not_reached(); + break; + } +} + +static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGReg bl, TCGReg bh, TCGLabel *l) +{ + /* todo */ + g_assert_not_reached(); +} + +static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) +{ + /* todo */ + g_assert_not_reached(); +} + +static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) +{ + ptrdiff_t offset = tcg_pcrel_diff(s, target); + tcg_debug_assert(offset == sextreg(offset, 1, 20) << 1); + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset); +} + +static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) +{ + TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; + ptrdiff_t offset = tcg_pcrel_diff(s, arg); + int ret; + + if (offset == sextreg(offset, 1, 20) << 1) { + /* short jump: -2097150 to 2097152 */ + tcg_out_opc_jump(s, OPC_JAL, link, offset); + } else if (TCG_TARGET_REG_BITS == 32 || + offset == sextreg(offset, 1, 31) << 1) { + /* long jump: -2147483646 to 2147483648 */ + tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0); + tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0); + ret = reloc_call(s->code_ptr - 2, arg);\ + tcg_debug_assert(ret == true); + } else if (TCG_TARGET_REG_BITS == 64) { + /* far jump: 64-bit */ + tcg_target_long imm = sextreg((tcg_target_long)arg, 0, 12); + tcg_target_long base = (tcg_target_long)arg - imm; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base); + tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm); + } else { + g_assert_not_reached(); + } +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +{ + tcg_out_call_int(s, arg, false); +} + +static void tcg_out_mb(TCGContext *s, TCGArg a0) +{ + tcg_insn_unit insn = OPC_FENCE; + + if (a0 & TCG_MO_LD_LD) { + insn |= 0x02200000; + } + if (a0 & TCG_MO_ST_LD) { + insn |= 0x01200000; + } + if (a0 & TCG_MO_LD_ST) { + insn |= 0x02100000; + } + if (a0 & TCG_MO_ST_ST) { + insn |= 0x02200000; + } + tcg_out32(s, insn); +} + +/* + * Load/store and TLB + */ + +#if defined(CONFIG_SOFTMMU) +#include "tcg-ldst.inc.c" + +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * TCGMemOpIdx oi, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, +#if TCG_TARGET_REG_BITS == 64 + [MO_LESL] = helper_le_ldsl_mmu, +#endif + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, +#if TCG_TARGET_REG_BITS == 64 + [MO_BESL] = helper_be_ldsl_mmu, +#endif + [MO_BEQ] = helper_be_ldq_mmu, +}; + +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, TCGMemOpIdx oi, + * uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; + +static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, + TCGReg addrh, TCGMemOpIdx oi, + tcg_insn_unit **label_ptr, bool is_load) +{ + TCGMemOp opc = get_memop(oi); + unsigned s_bits = opc & MO_SIZE; + unsigned a_bits = get_alignment_bits(opc); + target_ulong mask; + int mem_index = get_mmuidx(oi); + int cmp_off + = (is_load + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + RISCVInsn load_cmp_op = (TARGET_LONG_BITS == 64 ? OPC_LD : + TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW); + RISCVInsn load_add_op = TCG_TARGET_REG_BITS == 64 ? OPC_LD : OPC_LW; + TCGReg base = TCG_AREG0; + + /* We don't support oversize guests */ + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + g_assert_not_reached(); + } + + /* We don't support unaligned accesses. */ + if (a_bits < s_bits) { + a_bits = s_bits; + } + mask = (target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1); + + + /* Compensate for very large offsets. */ + if (add_off >= 0x1000) { + int adj; + base = TCG_REG_TMP2; + if (cmp_off <= 2 * 0xfff) { + adj = 0xfff; + tcg_out_opc_imm(s, OPC_ADDI, base, TCG_AREG0, adj); + } else { + adj = cmp_off - sextreg(cmp_off, 0, 12); + tcg_debug_assert(add_off - adj >= -0x1000 + && add_off - adj < 0x1000); + + tcg_out_opc_upper(s, OPC_LUI, base, adj); + tcg_out_opc_reg(s, OPC_ADD, base, base, TCG_AREG0); + } + add_off -= adj; + cmp_off -= adj; + } + + /* Extract the page index. */ + if (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS < 12) { + tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP0, addrl, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP0, TCG_REG_TMP0, + MAKE_64BIT_MASK(CPU_TLB_ENTRY_BITS, CPU_TLB_BITS)); + } else if (TARGET_PAGE_BITS >= 12) { + tcg_out_opc_upper(s, OPC_LUI, TCG_REG_TMP0, + MAKE_64BIT_MASK(TARGET_PAGE_BITS, CPU_TLB_BITS)); + tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP0, TCG_REG_TMP0, addrl); + tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP0, TCG_REG_TMP0, + CPU_TLB_BITS - CPU_TLB_ENTRY_BITS); + } else { + tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP0, addrl, TARGET_PAGE_BITS); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP0, TCG_REG_TMP0, + MAKE_64BIT_MASK(0, CPU_TLB_BITS)); + tcg_out_opc_imm(s, OPC_SLLI, TCG_REG_TMP0, TCG_REG_TMP0, + CPU_TLB_ENTRY_BITS); + } + + /* Add that to the base address to index the tlb. */ + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, base, TCG_REG_TMP0); + base = TCG_REG_TMP2; + + /* Load the tlb comparator and the addend. */ + tcg_out_ldst(s, load_cmp_op, TCG_REG_TMP0, base, cmp_off); + tcg_out_ldst(s, load_add_op, TCG_REG_TMP2, base, add_off); + + /* Clear the non-page, non-alignment bits from the address. */ + if (mask == sextreg(mask, 0, 12)) { + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addrl, mask); + } else { + tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, mask); + tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addrl); + } + + /* Compare masked address with the TLB entry. */ + label_ptr[0] = s->code_ptr; + tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0); + /* NOP to allow patching later */ + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0); + /* TODO: Move this out of line + * see: + * https://lists.nongnu.org/archive/html/qemu-devel/2018-11/msg02234.html + */ + + /* TLB Hit - translate address using addend. */ + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_REG_TMP0, addrl); + addrl = TCG_REG_TMP0; + } + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addrl); +} + +static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi, + TCGType ext, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addrhi, + void *raddr, tcg_insn_unit **label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->oi = oi; + label->type = ext; + label->datalo_reg = datalo; + label->datahi_reg = datahi; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + label->raddr = raddr; + label->label_ptr[0] = label_ptr[0]; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOpIdx oi = l->oi; + TCGMemOp opc = get_memop(oi); + TCGReg a0 = tcg_target_call_iarg_regs[0]; + TCGReg a1 = tcg_target_call_iarg_regs[1]; + TCGReg a2 = tcg_target_call_iarg_regs[2]; + TCGReg a3 = tcg_target_call_iarg_regs[3]; + + /* We don't support oversize guests */ + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + g_assert_not_reached(); + } + + /* resolve label address */ + patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, (intptr_t) s->code_ptr, 0); + + /* call load helper */ + tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg); + tcg_out_movi(s, TCG_TYPE_PTR, a2, oi); + tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr); + + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); + tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0); + + tcg_out_goto(s, l->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOpIdx oi = l->oi; + TCGMemOp opc = get_memop(oi); + TCGMemOp s_bits = opc & MO_SIZE; + TCGReg a0 = tcg_target_call_iarg_regs[0]; + TCGReg a1 = tcg_target_call_iarg_regs[1]; + TCGReg a2 = tcg_target_call_iarg_regs[2]; + TCGReg a3 = tcg_target_call_iarg_regs[3]; + TCGReg a4 = tcg_target_call_iarg_regs[4]; + + /* We don't support oversize guests */ + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + g_assert_not_reached(); + } + + /* resolve label address */ + patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, (intptr_t) s->code_ptr, 0); + + /* call store helper */ + tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_PTR, a2, l->datalo_reg); + switch (s_bits) { + case MO_8: + tcg_out_ext8u(s, a2, a2); + break; + case MO_16: + tcg_out_ext16u(s, a2, a2); + break; + default: + break; + } + tcg_out_movi(s, TCG_TYPE_PTR, a3, oi); + tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr); + + tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SSIZE)]); + + tcg_out_goto(s, l->raddr); +} +#endif /* CONFIG_SOFTMMU */ + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, + TCGReg base, TCGMemOp opc, bool is_64) +{ + const TCGMemOp bswap = opc & MO_BSWAP; + + /* We don't yet handle byteswapping, assert */ + g_assert(!bswap); + + switch (opc & (MO_SSIZE)) { + case MO_UB: + tcg_out_opc_imm(s, OPC_LBU, lo, base, 0); + break; + case MO_SB: + tcg_out_opc_imm(s, OPC_LB, lo, base, 0); + break; + case MO_UW: + tcg_out_opc_imm(s, OPC_LHU, lo, base, 0); + break; + case MO_SW: + tcg_out_opc_imm(s, OPC_LH, lo, base, 0); + break; + case MO_UL: + if (TCG_TARGET_REG_BITS == 64 && is_64) { + tcg_out_opc_imm(s, OPC_LWU, lo, base, 0); + break; + } + /* FALLTHRU */ + case MO_SL: + tcg_out_opc_imm(s, OPC_LW, lo, base, 0); + break; + case MO_Q: + /* Prefer to load from offset 0 first, but allow for overlap. */ + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_opc_imm(s, OPC_LD, lo, base, 0); + } else if (lo != base) { + tcg_out_opc_imm(s, OPC_LW, lo, base, 0); + tcg_out_opc_imm(s, OPC_LW, hi, base, 4); + } else { + tcg_out_opc_imm(s, OPC_LW, hi, base, 4); + tcg_out_opc_imm(s, OPC_LW, lo, base, 0); + } + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg addr_regl, addr_regh __attribute__((unused)); + TCGReg data_regl, data_regh; + TCGMemOpIdx oi; + TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) + tcg_insn_unit *label_ptr[1]; +#endif + TCGReg base = TCG_REG_TMP0; + + data_regl = *args++; + data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); + addr_regl = *args++; + addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) + tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1); + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); + add_qemu_ldst_label(s, 1, oi, + (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), + data_regl, data_regh, addr_regl, addr_regh, + s->code_ptr, label_ptr); +#else + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, base, addr_regl); + addr_regl = base; + } + + if (guest_base == 0) { + tcg_out_opc_reg(s, OPC_ADD, base, addr_regl, TCG_REG_ZERO); + } else { + tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl); + } + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); +#endif +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, + TCGReg base, TCGMemOp opc) +{ + const TCGMemOp bswap = opc & MO_BSWAP; + + /* We don't yet handle byteswapping, assert */ + g_assert(!bswap); + + switch (opc & (MO_SSIZE)) { + case MO_8: + tcg_out_opc_store(s, OPC_SB, base, lo, 0); + break; + case MO_16: + tcg_out_opc_store(s, OPC_SH, base, lo, 0); + break; + case MO_32: + tcg_out_opc_store(s, OPC_SW, base, lo, 0); + break; + case MO_64: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_opc_store(s, OPC_SD, base, lo, 0); + } else { + tcg_out_opc_store(s, OPC_SW, base, lo, 0); + tcg_out_opc_store(s, OPC_SW, base, hi, 4); + } + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg addr_regl, addr_regh __attribute__((unused)); + TCGReg data_regl, data_regh; + TCGMemOpIdx oi; + TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) + tcg_insn_unit *label_ptr[1]; +#endif + TCGReg base = TCG_REG_TMP0; + + data_regl = *args++; + data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); + addr_regl = *args++; + addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) + tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0); + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + add_qemu_ldst_label(s, 0, oi, + (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), + data_regl, data_regh, addr_regl, addr_regh, + s->code_ptr, label_ptr); +#else + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, base, addr_regl); + addr_regl = base; + } + + if (guest_base == 0) { + tcg_out_opc_reg(s, OPC_ADD, base, addr_regl, TCG_REG_ZERO); + } else { + tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl); + } + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); +#endif +} + +static tcg_insn_unit *tb_ret_addr; + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + TCGArg a0 = args[0]; + TCGArg a1 = args[1]; + TCGArg a2 = args[2]; + int c2 = const_args[2]; + + switch (opc) { + case INDEX_op_exit_tb: + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_call_int(s, s->code_gen_epilogue, true); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); + tcg_out_call_int(s, tb_ret_addr, true); + } + break; + + case INDEX_op_goto_tb: + assert(s->tb_jmp_insn_offset == 0); + /* indirect jump method */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, + (uintptr_t)(s->tb_jmp_target_addr + a0)); + tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0); + set_jmp_reset_offset(s, a0); + break; + + case INDEX_op_goto_ptr: + tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0); + break; + + case INDEX_op_br: + tcg_out_reloc(s, s->code_ptr, R_RISCV_JAL, arg_label(a0), 0); + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0); + break; + + case INDEX_op_ld8u_i32: + case INDEX_op_ld8u_i64: + tcg_out_ldst(s, OPC_LBU, a0, a1, a2); + break; + case INDEX_op_ld8s_i32: + case INDEX_op_ld8s_i64: + tcg_out_ldst(s, OPC_LB, a0, a1, a2); + break; + case INDEX_op_ld16u_i32: + case INDEX_op_ld16u_i64: + tcg_out_ldst(s, OPC_LHU, a0, a1, a2); + break; + case INDEX_op_ld16s_i32: + case INDEX_op_ld16s_i64: + tcg_out_ldst(s, OPC_LH, a0, a1, a2); + break; + case INDEX_op_ld32u_i64: + tcg_out_ldst(s, OPC_LWU, a0, a1, a2); + break; + case INDEX_op_ld_i32: + case INDEX_op_ld32s_i64: + tcg_out_ldst(s, OPC_LW, a0, a1, a2); + break; + case INDEX_op_ld_i64: + tcg_out_ldst(s, OPC_LD, a0, a1, a2); + break; + + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + tcg_out_ldst(s, OPC_SB, a0, a1, a2); + break; + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + tcg_out_ldst(s, OPC_SH, a0, a1, a2); + break; + case INDEX_op_st_i32: + case INDEX_op_st32_i64: + tcg_out_ldst(s, OPC_SW, a0, a1, a2); + break; + case INDEX_op_st_i64: + tcg_out_ldst(s, OPC_SD, a0, a1, a2); + break; + + case INDEX_op_add_i32: + if (c2) { + tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_ADDW, a0, a1, a2); + } + break; + case INDEX_op_add_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_ADDI, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_ADD, a0, a1, a2); + } + break; + + case INDEX_op_sub_i32: + if (c2) { + tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, -a2); + } else { + tcg_out_opc_reg(s, OPC_SUBW, a0, a1, a2); + } + break; + case INDEX_op_sub_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_ADDI, a0, a1, -a2); + } else { + tcg_out_opc_reg(s, OPC_SUB, a0, a1, a2); + } + break; + + case INDEX_op_and_i32: + case INDEX_op_and_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_AND, a0, a1, a2); + } + break; + + case INDEX_op_or_i32: + case INDEX_op_or_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_OR, a0, a1, a2); + } + break; + + case INDEX_op_xor_i32: + case INDEX_op_xor_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2); + } + break; + + case INDEX_op_not_i32: + case INDEX_op_not_i64: + tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1); + break; + + case INDEX_op_neg_i32: + tcg_out_opc_reg(s, OPC_SUBW, a0, TCG_REG_ZERO, a1); + break; + case INDEX_op_neg_i64: + tcg_out_opc_reg(s, OPC_SUB, a0, TCG_REG_ZERO, a1); + break; + + case INDEX_op_mul_i32: + tcg_out_opc_reg(s, OPC_MULW, a0, a1, a2); + break; + case INDEX_op_mul_i64: + tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); + break; + + case INDEX_op_div_i32: + tcg_out_opc_reg(s, OPC_DIVW, a0, a1, a2); + break; + case INDEX_op_div_i64: + tcg_out_opc_reg(s, OPC_DIV, a0, a1, a2); + break; + + case INDEX_op_divu_i32: + tcg_out_opc_reg(s, OPC_DIVUW, a0, a1, a2); + break; + case INDEX_op_divu_i64: + tcg_out_opc_reg(s, OPC_DIVU, a0, a1, a2); + break; + + case INDEX_op_rem_i32: + tcg_out_opc_reg(s, OPC_REMW, a0, a1, a2); + break; + case INDEX_op_rem_i64: + tcg_out_opc_reg(s, OPC_REM, a0, a1, a2); + break; + + case INDEX_op_remu_i32: + tcg_out_opc_reg(s, OPC_REMUW, a0, a1, a2); + break; + case INDEX_op_remu_i64: + tcg_out_opc_reg(s, OPC_REMU, a0, a1, a2); + break; + + case INDEX_op_shl_i32: + if (c2) { + tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2); + } + break; + case INDEX_op_shl_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2); + } + break; + + case INDEX_op_shr_i32: + if (c2) { + tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2); + } + break; + case INDEX_op_shr_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2); + } + break; + + case INDEX_op_sar_i32: + if (c2) { + tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2); + } + break; + case INDEX_op_sar_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2); + } + break; + + case INDEX_op_add2_i32: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], false, true); + break; + case INDEX_op_add2_i64: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], false, false); + break; + case INDEX_op_sub2_i32: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], true, true); + break; + case INDEX_op_sub2_i64: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], true, false); + break; + + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); + break; + case INDEX_op_brcond2_i32: + tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5])); + break; + + case INDEX_op_setcond_i32: + case INDEX_op_setcond_i64: + tcg_out_setcond(s, args[3], a0, a1, a2); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args, false); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args, true); + break; + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args, false); + break; + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args, true); + break; + + case INDEX_op_ext8u_i32: + case INDEX_op_ext8u_i64: + tcg_out_ext8u(s, a0, a1); + break; + + case INDEX_op_ext16u_i32: + case INDEX_op_ext16u_i64: + tcg_out_ext16u(s, a0, a1); + break; + + case INDEX_op_ext32u_i64: + case INDEX_op_extu_i32_i64: + tcg_out_ext32u(s, a0, a1); + break; + + case INDEX_op_ext8s_i32: + case INDEX_op_ext8s_i64: + tcg_out_ext8s(s, a0, a1); + break; + + case INDEX_op_ext16s_i32: + case INDEX_op_ext16s_i64: + tcg_out_ext16s(s, a0, a1); + break; + + case INDEX_op_ext32s_i64: + case INDEX_op_extrl_i64_i32: + case INDEX_op_ext_i32_i64: + tcg_out_ext32s(s, a0, a1); + break; + + case INDEX_op_extrh_i64_i32: + tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32); + break; + + case INDEX_op_mulsh_i32: + case INDEX_op_mulsh_i64: + tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2); + break; + + case INDEX_op_muluh_i32: + case INDEX_op_muluh_i64: + tcg_out_opc_reg(s, OPC_MULHU, a0, a1, a2); + break; + + case INDEX_op_mb: + tcg_out_mb(s, a0); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + g_assert_not_reached(); + } +} + +static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) +{ + static const TCGTargetOpDef r + = { .args_ct_str = { "r" } }; + static const TCGTargetOpDef r_r + = { .args_ct_str = { "r", "r" } }; + static const TCGTargetOpDef rZ_r + = { .args_ct_str = { "rZ", "r" } }; + static const TCGTargetOpDef rZ_rZ + = { .args_ct_str = { "rZ", "rZ" } }; + static const TCGTargetOpDef rZ_rZ_rZ_rZ + = { .args_ct_str = { "rZ", "rZ", "rZ", "rZ" } }; + static const TCGTargetOpDef r_r_ri + = { .args_ct_str = { "r", "r", "ri" } }; + static const TCGTargetOpDef r_r_rI + = { .args_ct_str = { "r", "r", "rI" } }; + static const TCGTargetOpDef r_rZ_rN + = { .args_ct_str = { "r", "rZ", "rN" } }; + static const TCGTargetOpDef r_rZ_rZ + = { .args_ct_str = { "r", "rZ", "rZ" } }; + static const TCGTargetOpDef r_rZ_rZ_rZ_rZ + = { .args_ct_str = { "r", "rZ", "rZ", "rZ", "rZ" } }; + static const TCGTargetOpDef r_L + = { .args_ct_str = { "r", "L" } }; + static const TCGTargetOpDef r_r_L + = { .args_ct_str = { "r", "r", "L" } }; + static const TCGTargetOpDef r_L_L + = { .args_ct_str = { "r", "L", "L" } }; + static const TCGTargetOpDef r_r_L_L + = { .args_ct_str = { "r", "r", "L", "L" } }; + static const TCGTargetOpDef LZ_L + = { .args_ct_str = { "LZ", "L" } }; + static const TCGTargetOpDef LZ_L_L + = { .args_ct_str = { "LZ", "L", "L" } }; + static const TCGTargetOpDef LZ_LZ_L + = { .args_ct_str = { "LZ", "LZ", "L" } }; + static const TCGTargetOpDef LZ_LZ_L_L + = { .args_ct_str = { "LZ", "LZ", "L", "L" } }; + static const TCGTargetOpDef r_r_rZ_rZ_rM_rM + = { .args_ct_str = { "r", "r", "rZ", "rZ", "rM", "rM" } }; + + switch (op) { + case INDEX_op_goto_ptr: + return &r; + + case INDEX_op_ld8u_i32: + case INDEX_op_ld8s_i32: + case INDEX_op_ld16u_i32: + case INDEX_op_ld16s_i32: + case INDEX_op_ld_i32: + case INDEX_op_not_i32: + case INDEX_op_neg_i32: + case INDEX_op_ld8u_i64: + case INDEX_op_ld8s_i64: + case INDEX_op_ld16u_i64: + case INDEX_op_ld16s_i64: + case INDEX_op_ld32s_i64: + case INDEX_op_ld32u_i64: + case INDEX_op_ld_i64: + case INDEX_op_not_i64: + case INDEX_op_neg_i64: + case INDEX_op_ext8u_i32: + case INDEX_op_ext8u_i64: + case INDEX_op_ext16u_i32: + case INDEX_op_ext16u_i64: + case INDEX_op_ext32u_i64: + case INDEX_op_extu_i32_i64: + case INDEX_op_ext8s_i32: + case INDEX_op_ext8s_i64: + case INDEX_op_ext16s_i32: + case INDEX_op_ext16s_i64: + case INDEX_op_ext32s_i64: + case INDEX_op_extrl_i64_i32: + case INDEX_op_extrh_i64_i32: + case INDEX_op_ext_i32_i64: + return &r_r; + + case INDEX_op_st8_i32: + case INDEX_op_st16_i32: + case INDEX_op_st_i32: + case INDEX_op_st8_i64: + case INDEX_op_st16_i64: + case INDEX_op_st32_i64: + case INDEX_op_st_i64: + return &rZ_r; + + case INDEX_op_add_i32: + case INDEX_op_and_i32: + case INDEX_op_or_i32: + case INDEX_op_xor_i32: + case INDEX_op_add_i64: + case INDEX_op_and_i64: + case INDEX_op_or_i64: + case INDEX_op_xor_i64: + return &r_r_rI; + + case INDEX_op_sub_i32: + case INDEX_op_sub_i64: + return &r_rZ_rN; + + case INDEX_op_mul_i32: + case INDEX_op_mulsh_i32: + case INDEX_op_muluh_i32: + case INDEX_op_div_i32: + case INDEX_op_divu_i32: + case INDEX_op_rem_i32: + case INDEX_op_remu_i32: + case INDEX_op_setcond_i32: + case INDEX_op_mul_i64: + case INDEX_op_mulsh_i64: + case INDEX_op_muluh_i64: + case INDEX_op_div_i64: + case INDEX_op_divu_i64: + case INDEX_op_rem_i64: + case INDEX_op_remu_i64: + case INDEX_op_setcond_i64: + return &r_rZ_rZ; + + case INDEX_op_shl_i32: + case INDEX_op_shr_i32: + case INDEX_op_sar_i32: + case INDEX_op_shl_i64: + case INDEX_op_shr_i64: + case INDEX_op_sar_i64: + return &r_r_ri; + + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + return &rZ_rZ; + + case INDEX_op_add2_i32: + case INDEX_op_add2_i64: + case INDEX_op_sub2_i32: + case INDEX_op_sub2_i64: + return &r_r_rZ_rZ_rM_rM; + + case INDEX_op_brcond2_i32: + return &rZ_rZ_rZ_rZ; + + case INDEX_op_setcond2_i32: + return &r_rZ_rZ_rZ_rZ; + + case INDEX_op_qemu_ld_i32: + return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L; + case INDEX_op_qemu_st_i32: + return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &LZ_L : &LZ_L_L; + case INDEX_op_qemu_ld_i64: + return TCG_TARGET_REG_BITS == 64 ? &r_L + : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L + : &r_r_L_L; + case INDEX_op_qemu_st_i64: + return TCG_TARGET_REG_BITS == 64 ? &LZ_L + : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &LZ_LZ_L + : &LZ_LZ_L_L; + + default: + return NULL; + } +} + +static const int tcg_target_callee_save_regs[] = { + TCG_REG_S0, /* used for the global env (TCG_AREG0) */ + TCG_REG_S1, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + TCG_REG_S5, + TCG_REG_S6, + TCG_REG_S7, + TCG_REG_S8, + TCG_REG_S9, + TCG_REG_S10, + TCG_REG_S11, + TCG_REG_RA, /* should be last for ABI compliance */ +}; + +/* Stack frame parameters. */ +#define REG_SIZE (TCG_TARGET_REG_BITS / 8) +#define SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE) +#define TEMP_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) +#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & -TCG_TARGET_STACK_ALIGN) +#define SAVE_OFS (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE) + +/* We're expecting to be able to use an immediate for frame allocation. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff); + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int i; + + tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE); + + /* TB prologue */ + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE); + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { + tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], + TCG_REG_SP, SAVE_OFS + i * REG_SIZE); + } + +#if !defined(CONFIG_SOFTMMU) + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); +#endif + + /* Call generated code */ + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0); + + /* Return path for goto_ptr. Set return value to 0 */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO); + + /* TB epilogue */ + tb_ret_addr = s->code_ptr; + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { + tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], + TCG_REG_SP, SAVE_OFS + i * REG_SIZE); + } + + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE); + tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0); +} + +static void tcg_target_init(TCGContext *s) +{ + tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; + if (TCG_TARGET_REG_BITS == 64) { + tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; + } + + tcg_target_call_clobber_regs = -1u; + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S10); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S11); + + s->reserved_regs = 0; + tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP); +} + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2]; +} DebugFrame; + +#define ELF_HOST_MACHINE EM_RISCV + +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */ + .h.cie.return_column = TCG_REG_RA, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x80 + 9, 12, /* DW_CFA_offset, s1, -96 */ + 0x80 + 18, 11, /* DW_CFA_offset, s2, -88 */ + 0x80 + 19, 10, /* DW_CFA_offset, s3, -80 */ + 0x80 + 20, 9, /* DW_CFA_offset, s4, -72 */ + 0x80 + 21, 8, /* DW_CFA_offset, s5, -64 */ + 0x80 + 22, 7, /* DW_CFA_offset, s6, -56 */ + 0x80 + 23, 6, /* DW_CFA_offset, s7, -48 */ + 0x80 + 24, 5, /* DW_CFA_offset, s8, -40 */ + 0x80 + 25, 4, /* DW_CFA_offset, s9, -32 */ + 0x80 + 26, 3, /* DW_CFA_offset, s10, -24 */ + 0x80 + 27, 2, /* DW_CFA_offset, s11, -16 */ + 0x80 + 1 , 1, /* DW_CFA_offset, ra, -8 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 38652db32c..1bd7ef24af 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -240,6 +240,7 @@ void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l) if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { + l->refs++; tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l)); } } @@ -1405,6 +1406,7 @@ void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l) if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { + l->refs++; if (TCG_TARGET_REG_BITS == 32) { tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index db4e9188f4..7007ec0d4d 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -260,6 +260,7 @@ static inline void gen_set_label(TCGLabel *l) static inline void tcg_gen_br(TCGLabel *l) { + l->refs++; tcg_gen_op1(INDEX_op_br, label_arg(l)); } diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index e3a43aabb6..7a8a3edb5b 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -191,9 +191,10 @@ DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64)) /* QEMU specific */ DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT) -DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END) -DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END) -DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr)) +DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END) +DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END) +DEF(goto_ptr, 0, 1, 0, + TCG_OPF_BB_EXIT | TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr)) DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) diff --git a/tcg/tcg.c b/tcg/tcg.c index 963cb37892..c54b119020 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1887,7 +1887,21 @@ static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", }; -void tcg_dump_ops(TCGContext *s) +static inline bool tcg_regset_single(TCGRegSet d) +{ + return (d & (d - 1)) == 0; +} + +static inline TCGReg tcg_regset_first(TCGRegSet d) +{ + if (TCG_TARGET_NB_REGS <= 32) { + return ctz32(d); + } else { + return ctz64(d); + } +} + +static void tcg_dump_ops(TCGContext *s, bool have_prefs) { char buf[128]; TCGOp *op; @@ -1902,6 +1916,7 @@ void tcg_dump_ops(TCGContext *s) def = &tcg_op_defs[c]; if (c == INDEX_op_insn_start) { + nb_oargs = 0; col += qemu_log("\n ----"); for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { @@ -2021,12 +2036,15 @@ void tcg_dump_ops(TCGContext *s) col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); } } - if (op->life) { - unsigned life = op->life; - for (; col < 48; ++col) { + if (have_prefs || op->life) { + for (; col < 40; ++col) { putc(' ', qemu_logfile); } + } + + if (op->life) { + unsigned life = op->life; if (life & (SYNC_ARG * 3)) { qemu_log(" sync:"); @@ -2046,6 +2064,33 @@ void tcg_dump_ops(TCGContext *s) } } } + + if (have_prefs) { + for (i = 0; i < nb_oargs; ++i) { + TCGRegSet set = op->output_pref[i]; + + if (i == 0) { + qemu_log(" pref="); + } else { + qemu_log(","); + } + if (set == 0) { + qemu_log("none"); + } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { + qemu_log("all"); +#ifdef CONFIG_DEBUG_TCG + } else if (tcg_regset_single(set)) { + TCGReg reg = tcg_regset_first(set); + qemu_log("%s", tcg_target_reg_names[reg]); +#endif + } else if (TCG_TARGET_NB_REGS <= 32) { + qemu_log("%#x", (uint32_t)set); + } else { + qemu_log("%#" PRIx64, (uint64_t)set); + } + } + } + qemu_log("\n"); } } @@ -2171,6 +2216,26 @@ static void process_op_defs(TCGContext *s) void tcg_op_remove(TCGContext *s, TCGOp *op) { + TCGLabel *label; + + switch (op->opc) { + case INDEX_op_br: + label = arg_label(op->args[0]); + label->refs--; + break; + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + label = arg_label(op->args[3]); + label->refs--; + break; + case INDEX_op_brcond2_i32: + label = arg_label(op->args[5]); + label->refs--; + break; + default: + break; + } + QTAILQ_REMOVE(&s->ops, op, link); QTAILQ_INSERT_TAIL(&s->free_ops, op, link); s->nb_ops--; @@ -2219,43 +2284,181 @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) return new_op; } +/* Reachable analysis : remove unreachable code. */ +static void reachable_code_pass(TCGContext *s) +{ + TCGOp *op, *op_next; + bool dead = false; + + QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { + bool remove = dead; + TCGLabel *label; + int call_flags; + + switch (op->opc) { + case INDEX_op_set_label: + label = arg_label(op->args[0]); + if (label->refs == 0) { + /* + * While there is an occasional backward branch, virtually + * all branches generated by the translators are forward. + * Which means that generally we will have already removed + * all references to the label that will be, and there is + * little to be gained by iterating. + */ + remove = true; + } else { + /* Once we see a label, insns become live again. */ + dead = false; + remove = false; + + /* + * Optimization can fold conditional branches to unconditional. + * If we find a label with one reference which is preceded by + * an unconditional branch to it, remove both. This needed to + * wait until the dead code in between them was removed. + */ + if (label->refs == 1) { + TCGOp *op_prev = QTAILQ_PREV(op, TCGOpHead, link); + if (op_prev->opc == INDEX_op_br && + label == arg_label(op_prev->args[0])) { + tcg_op_remove(s, op_prev); + remove = true; + } + } + } + break; + + case INDEX_op_br: + case INDEX_op_exit_tb: + case INDEX_op_goto_ptr: + /* Unconditional branches; everything following is dead. */ + dead = true; + break; + + case INDEX_op_call: + /* Notice noreturn helper calls, raising exceptions. */ + call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; + if (call_flags & TCG_CALL_NO_RETURN) { + dead = true; + } + break; + + case INDEX_op_insn_start: + /* Never remove -- we need to keep these for unwind. */ + remove = false; + break; + + default: + break; + } + + if (remove) { + tcg_op_remove(s, op); + } + } +} + #define TS_DEAD 1 #define TS_MEM 2 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) +/* For liveness_pass_1, the register preferences for a given temp. */ +static inline TCGRegSet *la_temp_pref(TCGTemp *ts) +{ + return ts->state_ptr; +} + +/* For liveness_pass_1, reset the preferences for a given temp to the + * maximal regset for its type. + */ +static inline void la_reset_pref(TCGTemp *ts) +{ + *la_temp_pref(ts) + = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); +} + /* liveness analysis: end of function: all temps are dead, and globals should be in memory. */ -static void tcg_la_func_end(TCGContext *s) +static void la_func_end(TCGContext *s, int ng, int nt) { - int ng = s->nb_globals; - int nt = s->nb_temps; int i; for (i = 0; i < ng; ++i) { s->temps[i].state = TS_DEAD | TS_MEM; + la_reset_pref(&s->temps[i]); } for (i = ng; i < nt; ++i) { s->temps[i].state = TS_DEAD; + la_reset_pref(&s->temps[i]); } } /* liveness analysis: end of basic block: all temps are dead, globals and local temps should be in memory. */ -static void tcg_la_bb_end(TCGContext *s) +static void la_bb_end(TCGContext *s, int ng, int nt) { - int ng = s->nb_globals; - int nt = s->nb_temps; int i; for (i = 0; i < ng; ++i) { s->temps[i].state = TS_DEAD | TS_MEM; + la_reset_pref(&s->temps[i]); } for (i = ng; i < nt; ++i) { s->temps[i].state = (s->temps[i].temp_local ? TS_DEAD | TS_MEM : TS_DEAD); + la_reset_pref(&s->temps[i]); + } +} + +/* liveness analysis: sync globals back to memory. */ +static void la_global_sync(TCGContext *s, int ng) +{ + int i; + + for (i = 0; i < ng; ++i) { + int state = s->temps[i].state; + s->temps[i].state = state | TS_MEM; + if (state == TS_DEAD) { + /* If the global was previously dead, reset prefs. */ + la_reset_pref(&s->temps[i]); + } + } +} + +/* liveness analysis: sync globals back to memory and kill. */ +static void la_global_kill(TCGContext *s, int ng) +{ + int i; + + for (i = 0; i < ng; i++) { + s->temps[i].state = TS_DEAD | TS_MEM; + la_reset_pref(&s->temps[i]); + } +} + +/* liveness analysis: note live globals crossing calls. */ +static void la_cross_call(TCGContext *s, int nt) +{ + TCGRegSet mask = ~tcg_target_call_clobber_regs; + int i; + + for (i = 0; i < nt; i++) { + TCGTemp *ts = &s->temps[i]; + if (!(ts->state & TS_DEAD)) { + TCGRegSet *pset = la_temp_pref(ts); + TCGRegSet set = *pset; + + set &= mask; + /* If the combination is not possible, restart. */ + if (set == 0) { + set = tcg_target_available_regs[ts->type] & mask; + } + *pset = set; + } } } @@ -2265,16 +2468,25 @@ static void tcg_la_bb_end(TCGContext *s) static void liveness_pass_1(TCGContext *s) { int nb_globals = s->nb_globals; + int nb_temps = s->nb_temps; TCGOp *op, *op_prev; + TCGRegSet *prefs; + int i; + + prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); + for (i = 0; i < nb_temps; ++i) { + s->temps[i].state_ptr = prefs + i; + } - tcg_la_func_end(s); + /* ??? Should be redundant with the exit_tb that ends the TB. */ + la_func_end(s, nb_globals, nb_temps); QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, TCGOpHead, link, op_prev) { - int i, nb_iargs, nb_oargs; + int nb_iargs, nb_oargs; TCGOpcode opc_new, opc_new2; bool have_opc_new2; TCGLifeData arg_life = 0; - TCGTemp *arg_ts; + TCGTemp *ts; TCGOpcode opc = op->opc; const TCGOpDef *def = &tcg_op_defs[opc]; @@ -2282,6 +2494,7 @@ static void liveness_pass_1(TCGContext *s) case INDEX_op_call: { int call_flags; + int nb_call_regs; nb_oargs = TCGOP_CALLO(op); nb_iargs = TCGOP_CALLI(op); @@ -2290,53 +2503,74 @@ static void liveness_pass_1(TCGContext *s) /* pure functions can be removed if their result is unused */ if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { for (i = 0; i < nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts->state != TS_DEAD) { + ts = arg_temp(op->args[i]); + if (ts->state != TS_DEAD) { goto do_not_remove_call; } } goto do_remove; - } else { - do_not_remove_call: + } + do_not_remove_call: - /* output args are dead */ - for (i = 0; i < nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts->state & TS_DEAD) { - arg_life |= DEAD_ARG << i; - } - if (arg_ts->state & TS_MEM) { - arg_life |= SYNC_ARG << i; - } - arg_ts->state = TS_DEAD; + /* Output args are dead. */ + for (i = 0; i < nb_oargs; i++) { + ts = arg_temp(op->args[i]); + if (ts->state & TS_DEAD) { + arg_life |= DEAD_ARG << i; } + if (ts->state & TS_MEM) { + arg_life |= SYNC_ARG << i; + } + ts->state = TS_DEAD; + la_reset_pref(ts); - if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | - TCG_CALL_NO_READ_GLOBALS))) { - /* globals should go back to memory */ - for (i = 0; i < nb_globals; i++) { - s->temps[i].state = TS_DEAD | TS_MEM; - } - } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { - /* globals should be synced to memory */ - for (i = 0; i < nb_globals; i++) { - s->temps[i].state |= TS_MEM; - } + /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ + op->output_pref[i] = 0; + } + + if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | + TCG_CALL_NO_READ_GLOBALS))) { + la_global_kill(s, nb_globals); + } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { + la_global_sync(s, nb_globals); + } + + /* Record arguments that die in this helper. */ + for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { + ts = arg_temp(op->args[i]); + if (ts && ts->state & TS_DEAD) { + arg_life |= DEAD_ARG << i; } + } - /* record arguments that die in this helper */ - for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts && arg_ts->state & TS_DEAD) { - arg_life |= DEAD_ARG << i; - } + /* For all live registers, remove call-clobbered prefs. */ + la_cross_call(s, nb_temps); + + nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); + + /* Input arguments are live for preceding opcodes. */ + for (i = 0; i < nb_iargs; i++) { + ts = arg_temp(op->args[i + nb_oargs]); + if (ts && ts->state & TS_DEAD) { + /* For those arguments that die, and will be allocated + * in registers, clear the register set for that arg, + * to be filled in below. For args that will be on + * the stack, reset to any available reg. + */ + *la_temp_pref(ts) + = (i < nb_call_regs ? 0 : + tcg_target_available_regs[ts->type]); + ts->state &= ~TS_DEAD; } - /* input arguments are live for preceding opcodes */ - for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts) { - arg_ts->state &= ~TS_DEAD; - } + } + + /* For each input argument, add its input register to prefs. + If a temp is used once, this produces a single set bit. */ + for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { + ts = arg_temp(op->args[i + nb_oargs]); + if (ts) { + tcg_regset_set_reg(*la_temp_pref(ts), + tcg_target_call_iarg_regs[i]); } } } @@ -2345,7 +2579,9 @@ static void liveness_pass_1(TCGContext *s) break; case INDEX_op_discard: /* mark the temporary as dead */ - arg_temp(op->args[0])->state = TS_DEAD; + ts = arg_temp(op->args[0]); + ts->state = TS_DEAD; + la_reset_pref(ts); break; case INDEX_op_add2_i32: @@ -2440,43 +2676,96 @@ static void liveness_pass_1(TCGContext *s) goto do_not_remove; } } - do_remove: - tcg_op_remove(s, op); - } else { - do_not_remove: - /* output args are dead */ - for (i = 0; i < nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts->state & TS_DEAD) { - arg_life |= DEAD_ARG << i; - } - if (arg_ts->state & TS_MEM) { - arg_life |= SYNC_ARG << i; - } - arg_ts->state = TS_DEAD; + goto do_remove; + } + goto do_not_remove; + + do_remove: + tcg_op_remove(s, op); + break; + + do_not_remove: + for (i = 0; i < nb_oargs; i++) { + ts = arg_temp(op->args[i]); + + /* Remember the preference of the uses that followed. */ + op->output_pref[i] = *la_temp_pref(ts); + + /* Output args are dead. */ + if (ts->state & TS_DEAD) { + arg_life |= DEAD_ARG << i; + } + if (ts->state & TS_MEM) { + arg_life |= SYNC_ARG << i; } + ts->state = TS_DEAD; + la_reset_pref(ts); + } - /* if end of basic block, update */ - if (def->flags & TCG_OPF_BB_END) { - tcg_la_bb_end(s); - } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { - /* globals should be synced to memory */ - for (i = 0; i < nb_globals; i++) { - s->temps[i].state |= TS_MEM; - } + /* If end of basic block, update. */ + if (def->flags & TCG_OPF_BB_EXIT) { + la_func_end(s, nb_globals, nb_temps); + } else if (def->flags & TCG_OPF_BB_END) { + la_bb_end(s, nb_globals, nb_temps); + } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { + la_global_sync(s, nb_globals); + if (def->flags & TCG_OPF_CALL_CLOBBER) { + la_cross_call(s, nb_temps); } + } - /* record arguments that die in this opcode */ - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts->state & TS_DEAD) { - arg_life |= DEAD_ARG << i; - } + /* Record arguments that die in this opcode. */ + for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + ts = arg_temp(op->args[i]); + if (ts->state & TS_DEAD) { + arg_life |= DEAD_ARG << i; + } + } + + /* Input arguments are live for preceding opcodes. */ + for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + ts = arg_temp(op->args[i]); + if (ts->state & TS_DEAD) { + /* For operands that were dead, initially allow + all regs for the type. */ + *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; + ts->state &= ~TS_DEAD; + } + } + + /* Incorporate constraints for this operand. */ + switch (opc) { + case INDEX_op_mov_i32: + case INDEX_op_mov_i64: + /* Note that these are TCG_OPF_NOT_PRESENT and do not + have proper constraints. That said, special case + moves to propagate preferences backward. */ + if (IS_DEAD_ARG(1)) { + *la_temp_pref(arg_temp(op->args[0])) + = *la_temp_pref(arg_temp(op->args[1])); } - /* input arguments are live for preceding opcodes */ + break; + + default: for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { - arg_temp(op->args[i])->state &= ~TS_DEAD; + const TCGArgConstraint *ct = &def->args_ct[i]; + TCGRegSet set, *pset; + + ts = arg_temp(op->args[i]); + pset = la_temp_pref(ts); + set = *pset; + + set &= ct->u.regs; + if (ct->ct & TCG_CT_IALIAS) { + set &= op->output_pref[ct->alias_index]; + } + /* If the combination is not possible, restart. */ + if (set == 0) { + set = ct->u.regs; + } + *pset = set; } + break; } break; } @@ -2727,7 +3016,7 @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) s->current_frame_offset += sizeof(tcg_target_long); } -static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet); +static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); /* Mark a temporary as free or dead. If 'free_or_dead' is negative, mark it free; otherwise mark it dead. */ @@ -2755,8 +3044,8 @@ static inline void temp_dead(TCGContext *s, TCGTemp *ts) registers needs to be allocated to store a constant. If 'free_or_dead' is non-zero, subsequently release the temporary; if it is positive, the temp is dead; if it is negative, the temp is free. */ -static void temp_sync(TCGContext *s, TCGTemp *ts, - TCGRegSet allocated_regs, int free_or_dead) +static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, + TCGRegSet preferred_regs, int free_or_dead) { if (ts->fixed_reg) { return; @@ -2776,7 +3065,7 @@ static void temp_sync(TCGContext *s, TCGTemp *ts, break; } temp_load(s, ts, tcg_target_available_regs[ts->type], - allocated_regs); + allocated_regs, preferred_regs); /* fallthrough */ case TEMP_VAL_REG: @@ -2803,35 +3092,76 @@ static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) { TCGTemp *ts = s->reg_to_temp[reg]; if (ts != NULL) { - temp_sync(s, ts, allocated_regs, -1); + temp_sync(s, ts, allocated_regs, 0, -1); } } -/* Allocate a register belonging to reg1 & ~reg2 */ -static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, - TCGRegSet allocated_regs, bool rev) +/** + * tcg_reg_alloc: + * @required_regs: Set of registers in which we must allocate. + * @allocated_regs: Set of registers which must be avoided. + * @preferred_regs: Set of registers we should prefer. + * @rev: True if we search the registers in "indirect" order. + * + * The allocated register must be in @required_regs & ~@allocated_regs, + * but if we can put it in @preferred_regs we may save a move later. + */ +static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, + TCGRegSet allocated_regs, + TCGRegSet preferred_regs, bool rev) { - int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order); + int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); + TCGRegSet reg_ct[2]; const int *order; - TCGReg reg; - TCGRegSet reg_ct; - reg_ct = desired_regs & ~allocated_regs; + reg_ct[1] = required_regs & ~allocated_regs; + tcg_debug_assert(reg_ct[1] != 0); + reg_ct[0] = reg_ct[1] & preferred_regs; + + /* Skip the preferred_regs option if it cannot be satisfied, + or if the preference made no difference. */ + f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; + order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; - /* first try free registers */ - for(i = 0; i < n; i++) { - reg = order[i]; - if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL) - return reg; + /* Try free registers, preferences first. */ + for (j = f; j < 2; j++) { + TCGRegSet set = reg_ct[j]; + + if (tcg_regset_single(set)) { + /* One register in the set. */ + TCGReg reg = tcg_regset_first(set); + if (s->reg_to_temp[reg] == NULL) { + return reg; + } + } else { + for (i = 0; i < n; i++) { + TCGReg reg = order[i]; + if (s->reg_to_temp[reg] == NULL && + tcg_regset_test_reg(set, reg)) { + return reg; + } + } + } } - /* XXX: do better spill choice */ - for(i = 0; i < n; i++) { - reg = order[i]; - if (tcg_regset_test_reg(reg_ct, reg)) { + /* We must spill something. */ + for (j = f; j < 2; j++) { + TCGRegSet set = reg_ct[j]; + + if (tcg_regset_single(set)) { + /* One register in the set. */ + TCGReg reg = tcg_regset_first(set); tcg_reg_free(s, reg, allocated_regs); return reg; + } else { + for (i = 0; i < n; i++) { + TCGReg reg = order[i]; + if (tcg_regset_test_reg(set, reg)) { + tcg_reg_free(s, reg, allocated_regs); + return reg; + } + } } } @@ -2841,7 +3171,7 @@ static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, /* Make sure the temporary is in a register. If needed, allocate the register from DESIRED while avoiding ALLOCATED. */ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, - TCGRegSet allocated_regs) + TCGRegSet allocated_regs, TCGRegSet preferred_regs) { TCGReg reg; @@ -2849,12 +3179,14 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, case TEMP_VAL_REG: return; case TEMP_VAL_CONST: - reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); + reg = tcg_reg_alloc(s, desired_regs, allocated_regs, + preferred_regs, ts->indirect_base); tcg_out_movi(s, ts->type, reg, ts->val); ts->mem_coherent = 0; break; case TEMP_VAL_MEM: - reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); + reg = tcg_reg_alloc(s, desired_regs, allocated_regs, + preferred_regs, ts->indirect_base); tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); ts->mem_coherent = 1; break; @@ -2924,7 +3256,8 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) } static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, - tcg_target_ulong val, TCGLifeData arg_life) + tcg_target_ulong val, TCGLifeData arg_life, + TCGRegSet preferred_regs) { if (ots->fixed_reg) { /* For fixed registers, we do not do any constant propagation. */ @@ -2940,7 +3273,7 @@ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, ots->val = val; ots->mem_coherent = 0; if (NEED_SYNC_ARG(0)) { - temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0)); + temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); } else if (IS_DEAD_ARG(0)) { temp_dead(s, ots); } @@ -2951,17 +3284,18 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op) TCGTemp *ots = arg_temp(op->args[0]); tcg_target_ulong val = op->args[1]; - tcg_reg_alloc_do_movi(s, ots, val, op->life); + tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]); } static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) { const TCGLifeData arg_life = op->life; - TCGRegSet allocated_regs; + TCGRegSet allocated_regs, preferred_regs; TCGTemp *ts, *ots; TCGType otype, itype; allocated_regs = s->reserved_regs; + preferred_regs = op->output_pref[0]; ots = arg_temp(op->args[0]); ts = arg_temp(op->args[1]); @@ -2975,7 +3309,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) if (IS_DEAD_ARG(1)) { temp_dead(s, ts); } - tcg_reg_alloc_do_movi(s, ots, val, arg_life); + tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); return; } @@ -2984,7 +3318,8 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) the SOURCE value into its own register first, that way we don't have to reload SOURCE the next time it is used. */ if (ts->val_type == TEMP_VAL_MEM) { - temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs); + temp_load(s, ts, tcg_target_available_regs[itype], + allocated_regs, preferred_regs); } tcg_debug_assert(ts->val_type == TEMP_VAL_REG); @@ -3014,7 +3349,8 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) input one. */ tcg_regset_set_reg(allocated_regs, ts->reg); ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], - allocated_regs, ots->indirect_base); + allocated_regs, preferred_regs, + ots->indirect_base); } tcg_out_mov(s, otype, ots->reg, ts->reg); } @@ -3022,7 +3358,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) ots->mem_coherent = 0; s->reg_to_temp[ots->reg] = ots; if (NEED_SYNC_ARG(0)) { - temp_sync(s, ots, allocated_regs, 0); + temp_sync(s, ots, allocated_regs, 0, 0); } } } @@ -3054,6 +3390,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* satisfy input constraints */ for (k = 0; k < nb_iargs; k++) { + TCGRegSet i_preferred_regs, o_preferred_regs; + i = def->sorted_args[nb_oargs + k]; arg = op->args[i]; arg_ct = &def->args_ct[i]; @@ -3064,17 +3402,18 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* constant is OK for instruction */ const_args[i] = 1; new_args[i] = ts->val; - goto iarg_end; + continue; } - temp_load(s, ts, arg_ct->u.regs, i_allocated_regs); - + i_preferred_regs = o_preferred_regs = 0; if (arg_ct->ct & TCG_CT_IALIAS) { + o_preferred_regs = op->output_pref[arg_ct->alias_index]; if (ts->fixed_reg) { /* if fixed register, we must allocate a new register if the alias is not the same register */ - if (arg != op->args[arg_ct->alias_index]) + if (arg != op->args[arg_ct->alias_index]) { goto allocate_in_reg; + } } else { /* if the input is aliased to an output and if it is not dead after the instruction, we must allocate @@ -3082,33 +3421,42 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) if (!IS_DEAD_ARG(i)) { goto allocate_in_reg; } + /* check if the current register has already been allocated for another input aliased to an output */ - int k2, i2; - for (k2 = 0 ; k2 < k ; k2++) { - i2 = def->sorted_args[nb_oargs + k2]; - if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && - (new_args[i2] == ts->reg)) { - goto allocate_in_reg; + if (ts->val_type == TEMP_VAL_REG) { + int k2, i2; + reg = ts->reg; + for (k2 = 0 ; k2 < k ; k2++) { + i2 = def->sorted_args[nb_oargs + k2]; + if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && + reg == new_args[i2]) { + goto allocate_in_reg; + } } } + i_preferred_regs = o_preferred_regs; } } + + temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs); reg = ts->reg; + if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { /* nothing to do : the constraint is satisfied */ } else { allocate_in_reg: /* allocate a new register matching the constraint and move the temporary register into it */ + temp_load(s, ts, tcg_target_available_regs[ts->type], + i_allocated_regs, 0); reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, - ts->indirect_base); + o_preferred_regs, ts->indirect_base); tcg_out_mov(s, ts->type, reg, ts->reg); } new_args[i] = reg; const_args[i] = 0; tcg_regset_set_reg(i_allocated_regs, reg); - iarg_end: ; } /* mark dead temporaries and free the associated registers */ @@ -3147,7 +3495,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } else if (arg_ct->ct & TCG_CT_NEWREG) { reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs | o_allocated_regs, - ts->indirect_base); + op->output_pref[k], ts->indirect_base); } else { /* if fixed register, we try to use it */ reg = ts->reg; @@ -3156,7 +3504,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) goto oarg_end; } reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, - ts->indirect_base); + op->output_pref[k], ts->indirect_base); } tcg_regset_set_reg(o_allocated_regs, reg); /* if a fixed register is used, then a move will be done afterwards */ @@ -3192,7 +3540,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) tcg_out_mov(s, ts->type, ts->reg, reg); } if (NEED_SYNC_ARG(i)) { - temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i)); + temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); } else if (IS_DEAD_ARG(i)) { temp_dead(s, ts); } @@ -3248,7 +3596,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) if (arg != TCG_CALL_DUMMY_ARG) { ts = arg_temp(arg); temp_load(s, ts, tcg_target_available_regs[ts->type], - s->reserved_regs); + s->reserved_regs, 0); tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); } #ifndef TCG_TARGET_STACK_GROWSUP @@ -3263,17 +3611,18 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) if (arg != TCG_CALL_DUMMY_ARG) { ts = arg_temp(arg); reg = tcg_target_call_iarg_regs[i]; - tcg_reg_free(s, reg, allocated_regs); if (ts->val_type == TEMP_VAL_REG) { if (ts->reg != reg) { + tcg_reg_free(s, reg, allocated_regs); tcg_out_mov(s, ts->type, reg, ts->reg); } } else { TCGRegSet arg_set = 0; + tcg_reg_free(s, reg, allocated_regs); tcg_regset_set_reg(arg_set, reg); - temp_load(s, ts, arg_set, allocated_regs); + temp_load(s, ts, arg_set, allocated_regs, 0); } tcg_regset_set_reg(allocated_regs, reg); @@ -3326,7 +3675,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) ts->mem_coherent = 0; s->reg_to_temp[reg] = ts; if (NEED_SYNC_ARG(i)) { - temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); + temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); } else if (IS_DEAD_ARG(i)) { temp_dead(s, ts); } @@ -3476,7 +3825,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) && qemu_log_in_addr_range(tb->pc))) { qemu_log_lock(); qemu_log("OP:\n"); - tcg_dump_ops(s); + tcg_dump_ops(s, false); qemu_log("\n"); qemu_log_unlock(); } @@ -3495,6 +3844,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) atomic_set(&prof->la_time, prof->la_time - profile_getclock()); #endif + reachable_code_pass(s); liveness_pass_1(s); if (s->nb_indirects > 0) { @@ -3503,7 +3853,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) && qemu_log_in_addr_range(tb->pc))) { qemu_log_lock(); qemu_log("OP before indirect lowering:\n"); - tcg_dump_ops(s); + tcg_dump_ops(s, false); qemu_log("\n"); qemu_log_unlock(); } @@ -3524,7 +3874,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) && qemu_log_in_addr_range(tb->pc))) { qemu_log_lock(); qemu_log("OP after optimization and liveness analysis:\n"); - tcg_dump_ops(s); + tcg_dump_ops(s, true); qemu_log("\n"); qemu_log_unlock(); } diff --git a/tcg/tcg.h b/tcg/tcg.h index ade692fdf5..3a629991ca 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -244,7 +244,8 @@ typedef struct TCGRelocation { typedef struct TCGLabel { unsigned has_value : 1; - unsigned id : 31; + unsigned id : 15; + unsigned refs : 16; union { uintptr_t value; tcg_insn_unit *value_ptr; @@ -462,11 +463,13 @@ typedef TCGv_ptr TCGv_env; /* call flags */ /* Helper does not read globals (either directly or through an exception). It implies TCG_CALL_NO_WRITE_GLOBALS. */ -#define TCG_CALL_NO_READ_GLOBALS 0x0010 +#define TCG_CALL_NO_READ_GLOBALS 0x0001 /* Helper does not write globals */ -#define TCG_CALL_NO_WRITE_GLOBALS 0x0020 +#define TCG_CALL_NO_WRITE_GLOBALS 0x0002 /* Helper can be safely suppressed if the return value is not used. */ -#define TCG_CALL_NO_SIDE_EFFECTS 0x0040 +#define TCG_CALL_NO_SIDE_EFFECTS 0x0004 +/* Helper is QEMU_NORETURN. */ +#define TCG_CALL_NO_RETURN 0x0008 /* convenience version of most used call flags */ #define TCG_CALL_NO_RWG TCG_CALL_NO_READ_GLOBALS @@ -616,6 +619,9 @@ typedef struct TCGOp { /* Arguments for the opcode. */ TCGArg args[MAX_OPC_PARAM]; + + /* Register preferences for the output(s). */ + TCGRegSet output_pref[2]; } TCGOp; #define TCGOP_CALLI(X) (X)->param1 @@ -1024,20 +1030,22 @@ typedef struct TCGArgConstraint { /* Bits for TCGOpDef->flags, 8 bits available. */ enum { + /* Instruction exits the translation block. */ + TCG_OPF_BB_EXIT = 0x01, /* Instruction defines the end of a basic block. */ - TCG_OPF_BB_END = 0x01, + TCG_OPF_BB_END = 0x02, /* Instruction clobbers call registers and potentially update globals. */ - TCG_OPF_CALL_CLOBBER = 0x02, + TCG_OPF_CALL_CLOBBER = 0x04, /* Instruction has side effects: it cannot be removed if its outputs are not used, and might trigger exceptions. */ - TCG_OPF_SIDE_EFFECTS = 0x04, + TCG_OPF_SIDE_EFFECTS = 0x08, /* Instruction operands are 64-bits (otherwise 32-bits). */ - TCG_OPF_64BIT = 0x08, + TCG_OPF_64BIT = 0x10, /* Instruction is optional and not implemented by the host, or insn is generic and should not be implemened by the host. */ - TCG_OPF_NOT_PRESENT = 0x10, + TCG_OPF_NOT_PRESENT = 0x20, /* Instruction operands are vectors. */ - TCG_OPF_VECTOR = 0x20, + TCG_OPF_VECTOR = 0x40, }; typedef struct TCGOpDef { @@ -1076,9 +1084,6 @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc); void tcg_optimize(TCGContext *s); -/* only used for debugging purposes */ -void tcg_dump_ops(TCGContext *s); - TCGv_i32 tcg_const_i32(int32_t val); TCGv_i64 tcg_const_i64(int64_t val); TCGv_i32 tcg_const_local_i32(int32_t val); diff --git a/tests/acceptance/virtio_version.py b/tests/acceptance/virtio_version.py new file mode 100644 index 0000000000..ce990250d8 --- /dev/null +++ b/tests/acceptance/virtio_version.py @@ -0,0 +1,176 @@ +""" +Check compatibility of virtio device types +""" +# Copyright (c) 2018 Red Hat, Inc. +# +# Author: +# Eduardo Habkost <ehabkost@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. +import sys +import os + +sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", "scripts")) +from qemu import QEMUMachine +from avocado_qemu import Test + +# Virtio Device IDs: +VIRTIO_NET = 1 +VIRTIO_BLOCK = 2 +VIRTIO_CONSOLE = 3 +VIRTIO_RNG = 4 +VIRTIO_BALLOON = 5 +VIRTIO_RPMSG = 7 +VIRTIO_SCSI = 8 +VIRTIO_9P = 9 +VIRTIO_RPROC_SERIAL = 11 +VIRTIO_CAIF = 12 +VIRTIO_GPU = 16 +VIRTIO_INPUT = 18 +VIRTIO_VSOCK = 19 +VIRTIO_CRYPTO = 20 + +PCI_VENDOR_ID_REDHAT_QUMRANET = 0x1af4 + +# Device IDs for legacy/transitional devices: +PCI_LEGACY_DEVICE_IDS = { + VIRTIO_NET: 0x1000, + VIRTIO_BLOCK: 0x1001, + VIRTIO_BALLOON: 0x1002, + VIRTIO_CONSOLE: 0x1003, + VIRTIO_SCSI: 0x1004, + VIRTIO_RNG: 0x1005, + VIRTIO_9P: 0x1009, + VIRTIO_VSOCK: 0x1012, +} + +def pci_modern_device_id(virtio_devid): + return virtio_devid + 0x1040 + +def devtype_implements(vm, devtype, implements): + return devtype in [d['name'] for d in vm.command('qom-list-types', implements=implements)] + +def get_pci_interfaces(vm, devtype): + interfaces = ('pci-express-device', 'conventional-pci-device') + return [i for i in interfaces if devtype_implements(vm, devtype, i)] + +class VirtioVersionCheck(Test): + """ + Check if virtio-version-specific device types result in the + same device tree created by `disable-modern` and + `disable-legacy`. + + :avocado: enable + :avocado: tags=x86_64 + """ + + # just in case there are failures, show larger diff: + maxDiff = 4096 + + def run_device(self, devtype, opts=None, machine='pc'): + """ + Run QEMU with `-device DEVTYPE`, return device info from `query-pci` + """ + with QEMUMachine(self.qemu_bin) as vm: + vm.set_machine(machine) + if opts: + devtype += ',' + opts + vm.add_args('-device', '%s,id=devfortest' % (devtype)) + vm.add_args('-S') + vm.launch() + + pcibuses = vm.command('query-pci') + alldevs = [dev for bus in pcibuses for dev in bus['devices']] + devfortest = [dev for dev in alldevs + if dev['qdev_id'] == 'devfortest'] + return devfortest[0], get_pci_interfaces(vm, devtype) + + + def assert_devids(self, dev, devid, non_transitional=False): + self.assertEqual(dev['id']['vendor'], PCI_VENDOR_ID_REDHAT_QUMRANET) + self.assertEqual(dev['id']['device'], devid) + if non_transitional: + self.assertTrue(0x1040 <= dev['id']['device'] <= 0x107f) + self.assertGreaterEqual(dev['id']['subsystem'], 0x40) + + def check_all_variants(self, qemu_devtype, virtio_devid): + """Check if a virtio device type and its variants behave as expected""" + # Force modern mode: + dev_modern, _ = self.run_device(qemu_devtype, + 'disable-modern=off,disable-legacy=on') + self.assert_devids(dev_modern, pci_modern_device_id(virtio_devid), + non_transitional=True) + + # <prefix>-non-transitional device types should be 100% equivalent to + # <prefix>,disable-modern=off,disable-legacy=on + dev_1_0, nt_ifaces = self.run_device('%s-non-transitional' % (qemu_devtype)) + self.assertEqual(dev_modern, dev_1_0) + + # Force transitional mode: + dev_trans, _ = self.run_device(qemu_devtype, + 'disable-modern=off,disable-legacy=off') + self.assert_devids(dev_trans, PCI_LEGACY_DEVICE_IDS[virtio_devid]) + + # Force legacy mode: + dev_legacy, _ = self.run_device(qemu_devtype, + 'disable-modern=on,disable-legacy=off') + self.assert_devids(dev_legacy, PCI_LEGACY_DEVICE_IDS[virtio_devid]) + + # No options: default to transitional on PC machine-type: + no_opts_pc, generic_ifaces = self.run_device(qemu_devtype) + self.assertEqual(dev_trans, no_opts_pc) + + #TODO: check if plugging on a PCI Express bus will make the + # device non-transitional + #no_opts_q35 = self.run_device(qemu_devtype, machine='q35') + #self.assertEqual(dev_modern, no_opts_q35) + + # <prefix>-transitional device types should be 100% equivalent to + # <prefix>,disable-modern=off,disable-legacy=off + dev_trans, trans_ifaces = self.run_device('%s-transitional' % (qemu_devtype)) + self.assertEqual(dev_trans, dev_trans) + + # ensure the interface information is correct: + self.assertIn('conventional-pci-device', generic_ifaces) + self.assertIn('pci-express-device', generic_ifaces) + + self.assertIn('conventional-pci-device', nt_ifaces) + self.assertIn('pci-express-device', nt_ifaces) + + self.assertIn('conventional-pci-device', trans_ifaces) + self.assertNotIn('pci-express-device', trans_ifaces) + + + def test_conventional_devs(self): + self.check_all_variants('virtio-net-pci', VIRTIO_NET) + # virtio-blk requires 'driver' parameter + #self.check_all_variants('virtio-blk-pci', VIRTIO_BLOCK) + self.check_all_variants('virtio-serial-pci', VIRTIO_CONSOLE) + self.check_all_variants('virtio-rng-pci', VIRTIO_RNG) + self.check_all_variants('virtio-balloon-pci', VIRTIO_BALLOON) + self.check_all_variants('virtio-scsi-pci', VIRTIO_SCSI) + # virtio-9p requires 'fsdev' parameter + #self.check_all_variants('virtio-9p-pci', VIRTIO_9P) + + def check_modern_only(self, qemu_devtype, virtio_devid): + """Check if a modern-only virtio device type behaves as expected""" + # Force modern mode: + dev_modern, _ = self.run_device(qemu_devtype, + 'disable-modern=off,disable-legacy=on') + self.assert_devids(dev_modern, pci_modern_device_id(virtio_devid), + non_transitional=True) + + # No options: should be modern anyway + dev_no_opts, ifaces = self.run_device(qemu_devtype) + self.assertEqual(dev_modern, dev_no_opts) + + self.assertIn('conventional-pci-device', ifaces) + self.assertIn('pci-express-device', ifaces) + + def test_modern_only_devs(self): + self.check_modern_only('virtio-vga', VIRTIO_GPU) + self.check_modern_only('virtio-gpu-pci', VIRTIO_GPU) + self.check_modern_only('virtio-mouse-pci', VIRTIO_INPUT) + self.check_modern_only('virtio-tablet-pci', VIRTIO_INPUT) + self.check_modern_only('virtio-keyboard-pci', VIRTIO_INPUT) diff --git a/tests/acpi-utils.c b/tests/acpi-utils.c index 6dc8ca1a8c..17abcc43a4 100644 --- a/tests/acpi-utils.c +++ b/tests/acpi-utils.c @@ -15,7 +15,6 @@ #include "qemu/osdep.h" #include <glib/gstdio.h> #include "qemu-common.h" -#include "hw/smbios/smbios.h" #include "qemu/bitmap.h" #include "acpi-utils.h" #include "boot-sector.h" @@ -52,15 +51,44 @@ uint32_t acpi_find_rsdp_address(QTestState *qts) return off; } -void acpi_parse_rsdp_table(QTestState *qts, uint32_t addr, - AcpiRsdpDescriptor *rsdp_table) +uint32_t acpi_get_rsdt_address(uint8_t *rsdp_table) { - ACPI_READ_FIELD(qts, rsdp_table->signature, addr); - ACPI_ASSERT_CMP64(rsdp_table->signature, "RSD PTR "); - - ACPI_READ_FIELD(qts, rsdp_table->checksum, addr); - ACPI_READ_ARRAY(qts, rsdp_table->oem_id, addr); - ACPI_READ_FIELD(qts, rsdp_table->revision, addr); - ACPI_READ_FIELD(qts, rsdp_table->rsdt_physical_address, addr); - ACPI_READ_FIELD(qts, rsdp_table->length, addr); + uint32_t rsdt_physical_address; + + memcpy(&rsdt_physical_address, &rsdp_table[16 /* RsdtAddress offset */], 4); + return le32_to_cpu(rsdt_physical_address); +} + +uint64_t acpi_get_xsdt_address(uint8_t *rsdp_table) +{ + uint64_t xsdt_physical_address; + uint8_t revision = rsdp_table[15 /* Revision offset */]; + + /* We must have revision 2 if we're looking for an XSDT pointer */ + g_assert(revision == 2); + + memcpy(&xsdt_physical_address, &rsdp_table[24 /* XsdtAddress offset */], 8); + return le64_to_cpu(xsdt_physical_address); +} + +void acpi_parse_rsdp_table(QTestState *qts, uint32_t addr, uint8_t *rsdp_table) +{ + uint8_t revision; + + /* Read mandatory revision 0 table data (20 bytes) first */ + qtest_memread(qts, addr, rsdp_table, 20); + revision = rsdp_table[15 /* Revision offset */]; + + switch (revision) { + case 0: /* ACPI 1.0 RSDP */ + break; + case 2: /* ACPI 2.0+ RSDP */ + /* Read the rest of the RSDP table */ + qtest_memread(qts, addr + 20, rsdp_table + 20, 16); + break; + default: + g_assert_not_reached(); + } + + ACPI_ASSERT_CMP64(*((uint64_t *)(rsdp_table)), "RSD PTR "); } diff --git a/tests/acpi-utils.h b/tests/acpi-utils.h index 791bd5410e..c5b0e12aa2 100644 --- a/tests/acpi-utils.h +++ b/tests/acpi-utils.h @@ -74,7 +74,8 @@ typedef struct { uint8_t acpi_calc_checksum(const uint8_t *data, int len); uint32_t acpi_find_rsdp_address(QTestState *qts); -void acpi_parse_rsdp_table(QTestState *qts, uint32_t addr, - AcpiRsdpDescriptor *rsdp_table); +uint32_t acpi_get_rsdt_address(uint8_t *rsdp_table); +uint64_t acpi_get_xsdt_address(uint8_t *rsdp_table); +void acpi_parse_rsdp_table(QTestState *qts, uint32_t addr, uint8_t *rsdp_table); #endif /* TEST_ACPI_UTILS_H */ diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index bac4b0171b..d455b2abfc 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -13,7 +13,7 @@ #include "qemu/osdep.h" #include <glib/gstdio.h> #include "qemu-common.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "qemu/bitmap.h" #include "acpi-utils.h" #include "boot-sector.h" @@ -27,7 +27,7 @@ typedef struct { const char *machine; const char *variant; uint32_t rsdp_addr; - AcpiRsdpDescriptor rsdp_table; + uint8_t rsdp_table[36 /* ACPI 2.0+ RSDP size */]; AcpiRsdtDescriptorRev1 rsdt_table; uint32_t dsdt_addr; uint32_t facs_addr; @@ -86,19 +86,31 @@ static void test_acpi_rsdp_address(test_data *data) static void test_acpi_rsdp_table(test_data *data) { - AcpiRsdpDescriptor *rsdp_table = &data->rsdp_table; + uint8_t *rsdp_table = data->rsdp_table, revision; uint32_t addr = data->rsdp_addr; acpi_parse_rsdp_table(data->qts, addr, rsdp_table); + revision = rsdp_table[15 /* Revision offset */]; - /* rsdp checksum is not for the whole table, but for the first 20 bytes */ - g_assert(!acpi_calc_checksum((uint8_t *)rsdp_table, 20)); + switch (revision) { + case 0: /* ACPI 1.0 RSDP */ + /* With rev 1, checksum is only for the first 20 bytes */ + g_assert(!acpi_calc_checksum(rsdp_table, 20)); + break; + case 2: /* ACPI 2.0+ RSDP */ + /* With revision 2, we have 2 checksums */ + g_assert(!acpi_calc_checksum(rsdp_table, 20)); + g_assert(!acpi_calc_checksum(rsdp_table, 36)); + break; + default: + g_assert_not_reached(); + } } static void test_acpi_rsdt_table(test_data *data) { AcpiRsdtDescriptorRev1 *rsdt_table = &data->rsdt_table; - uint32_t addr = le32_to_cpu(data->rsdp_table.rsdt_physical_address); + uint32_t addr = acpi_get_rsdt_address(data->rsdp_table); uint32_t *tables; int tables_nr; uint8_t checksum; diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c index f4a677d238..668f00144e 100644 --- a/tests/cpu-plug-test.c +++ b/tests/cpu-plug-test.c @@ -157,9 +157,7 @@ static void add_pc_test_case(const char *mname) (strcmp(mname, "pc-0.15") == 0) || (strcmp(mname, "pc-0.14") == 0) || (strcmp(mname, "pc-0.13") == 0) || - (strcmp(mname, "pc-0.12") == 0) || - (strcmp(mname, "pc-0.11") == 0) || - (strcmp(mname, "pc-0.10") == 0)) { + (strcmp(mname, "pc-0.12") == 0)) { path = g_strdup_printf("cpu-plug/%s/init/%ux%ux%u&maxcpus=%u", mname, data->sockets, data->cores, data->threads, data->maxcpus); diff --git a/tests/fp/platform.h b/tests/fp/platform.h index c20ba70baa..f8c423dde3 100644 --- a/tests/fp/platform.h +++ b/tests/fp/platform.h @@ -29,7 +29,6 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "config-host.h" #ifndef HOST_WORDS_BIGENDIAN #define LITTLEENDIAN 1 diff --git a/tests/tpm-util.h b/tests/tpm-util.h index 9e98bc5124..5755698ad2 100644 --- a/tests/tpm-util.h +++ b/tests/tpm-util.h @@ -13,7 +13,6 @@ #ifndef TESTS_TPM_UTIL_H #define TESTS_TPM_UTIL_H -#include "qemu/osdep.h" #include "io/channel-socket.h" typedef void (tx_func)(QTestState *s, diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c index 0cf8d0baca..0033b61f2e 100644 --- a/tests/vhost-user-bridge.c +++ b/tests/vhost-user-bridge.c @@ -29,8 +29,8 @@ #define _FILE_OFFSET_BITS 64 -#include "qemu/atomic.h" #include "qemu/osdep.h" +#include "qemu/atomic.h" #include "qemu/iov.h" #include "standard-headers/linux/virtio_net.h" #include "contrib/libvhost-user/libvhost-user.h" diff --git a/tests/vmgenid-test.c b/tests/vmgenid-test.c index 84449ce8ae..1c1d435bbd 100644 --- a/tests/vmgenid-test.c +++ b/tests/vmgenid-test.c @@ -35,7 +35,7 @@ static uint32_t acpi_find_vgia(QTestState *qts) { uint32_t rsdp_offset; uint32_t guid_offset = 0; - AcpiRsdpDescriptor rsdp_table; + uint8_t rsdp_table[36 /* ACPI 2.0+ RSDP size */]; uint32_t rsdt, rsdt_table_length; AcpiRsdtDescriptorRev1 rsdt_table; size_t tables_nr; @@ -52,9 +52,11 @@ static uint32_t acpi_find_vgia(QTestState *qts) g_assert_cmphex(rsdp_offset, <, RSDP_ADDR_INVALID); - acpi_parse_rsdp_table(qts, rsdp_offset, &rsdp_table); + acpi_parse_rsdp_table(qts, rsdp_offset, rsdp_table); + + rsdt = acpi_get_rsdt_address(rsdp_table); + g_assert(rsdt); - rsdt = le32_to_cpu(rsdp_table.rsdt_physical_address); /* read the header */ ACPI_READ_TABLE_HEADER(qts, &rsdt_table, rsdt); ACPI_ASSERT_CMP(rsdt_table.signature, "RSDT"); diff --git a/util/qemu-thread-common.h b/util/qemu-thread-common.h index a0ea7c0d92..2af6b12085 100644 --- a/util/qemu-thread-common.h +++ b/util/qemu-thread-common.h @@ -13,7 +13,6 @@ #ifndef QEMU_THREAD_COMMON_H #define QEMU_THREAD_COMMON_H -#include "qemu/typedefs.h" #include "qemu/thread.h" #include "trace.h" diff --git a/vl.c b/vl.c index f5c8ef973c..8353d3c718 100644 --- a/vl.c +++ b/vl.c @@ -61,7 +61,7 @@ int main(int argc, char **argv) #include "hw/display/vga.h" #include "hw/bt.h" #include "sysemu/watchdog.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "hw/acpi/acpi.h" #include "hw/xen/xen.h" #include "hw/qdev.h" @@ -1577,6 +1577,8 @@ static NotifierList suspend_notifiers = NOTIFIER_LIST_INITIALIZER(suspend_notifiers); static NotifierList wakeup_notifiers = NOTIFIER_LIST_INITIALIZER(wakeup_notifiers); +static NotifierList shutdown_notifiers = + NOTIFIER_LIST_INITIALIZER(shutdown_notifiers); static uint32_t wakeup_reason_mask = ~(1 << QEMU_WAKEUP_REASON_NONE); ShutdownCause qemu_shutdown_requested_get(void) @@ -1828,6 +1830,12 @@ static void qemu_system_powerdown(void) notifier_list_notify(&powerdown_notifiers, NULL); } +static void qemu_system_shutdown(ShutdownCause cause) +{ + qapi_event_send_shutdown(shutdown_caused_by_guest(cause), cause); + notifier_list_notify(&shutdown_notifiers, &cause); +} + void qemu_system_powerdown_request(void) { trace_qemu_system_powerdown_request(); @@ -1840,6 +1848,11 @@ void qemu_register_powerdown_notifier(Notifier *notifier) notifier_list_add(&powerdown_notifiers, notifier); } +void qemu_register_shutdown_notifier(Notifier *notifier) +{ + notifier_list_add(&shutdown_notifiers, notifier); +} + void qemu_system_debug_request(void) { debug_requested = 1; @@ -1867,7 +1880,7 @@ static bool main_loop_should_exit(void) request = qemu_shutdown_requested(); if (request) { qemu_kill_report(); - qapi_event_send_shutdown(shutdown_caused_by_guest(request), request); + qemu_system_shutdown(request); if (no_shutdown) { vm_stop(RUN_STATE_SHUTDOWN); } else { |