From 97553976dde2181be3ff95863905c4e34776ae70 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:26 +0100 Subject: ivshmem: Add missing newlines to debug printfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-12-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 1838bc8506..11cbc039b7 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -568,10 +568,10 @@ static void setup_interrupt(IVShmemState *s, int vector) IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); if (!with_irqfd) { - IVSHMEM_DPRINTF("with eventfd"); + IVSHMEM_DPRINTF("with eventfd\n"); watch_vector_notifier(s, n, vector); } else if (msix_enabled(pdev)) { - IVSHMEM_DPRINTF("with irqfd"); + IVSHMEM_DPRINTF("with irqfd\n"); if (ivshmem_add_kvm_msi_virq(s, vector) < 0) { return; } @@ -582,7 +582,7 @@ static void setup_interrupt(IVShmemState *s, int vector) } } else { /* it will be delayed until msix is enabled, in write_config */ - IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled"); + IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n"); } } -- cgit 1.4.1 From a4fa93bf20a4be3377df6ac9c9d63cccc31ab68c Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:27 +0100 Subject: ivshmem: Compile debug prints unconditionally to prevent bit-rot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-13-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 11cbc039b7..6ac8c0d707 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -48,13 +48,13 @@ #define IVSHMEM_REG_BAR_SIZE 0x100 -//#define DEBUG_IVSHMEM -#ifdef DEBUG_IVSHMEM -#define IVSHMEM_DPRINTF(fmt, ...) \ - do {printf("IVSHMEM: " fmt, ## __VA_ARGS__); } while (0) -#else -#define IVSHMEM_DPRINTF(fmt, ...) -#endif +#define IVSHMEM_DEBUG 0 +#define IVSHMEM_DPRINTF(fmt, ...) \ + do { \ + if (IVSHMEM_DEBUG) { \ + printf("IVSHMEM: " fmt, ## __VA_ARGS__); \ + } \ + } while (0) #define TYPE_IVSHMEM "ivshmem" #define IVSHMEM(obj) \ -- cgit 1.4.1 From e64befe929cc4c4a53a79ff469b1b52c210a3468 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:28 +0100 Subject: ivshmem: Clean up after commit 9940c32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IVShmemState member eventfd_chr is useless since commit 9940c32. Drop it. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-14-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 6ac8c0d707..9ea79ab01d 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -79,7 +79,6 @@ typedef struct IVShmemState { uint32_t intrmask; uint32_t intrstatus; - CharDriverState **eventfd_chr; CharDriverState *server_chr; Fifo8 incoming_fifo; MemoryRegion ivshmem_mmio; @@ -942,8 +941,6 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) pci_register_bar(dev, 2, attr, &s->bar); - s->eventfd_chr = g_malloc0(s->vectors * sizeof(CharDriverState *)); - qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, ivshmem_check_version, ivshmem_event, s); } else { @@ -1007,15 +1004,6 @@ static void pci_ivshmem_exit(PCIDevice *dev) memory_region_del_subregion(&s->bar, &s->ivshmem); } - if (s->eventfd_chr) { - for (i = 0; i < s->vectors; i++) { - if (s->eventfd_chr[i]) { - qemu_chr_free(s->eventfd_chr[i]); - } - } - g_free(s->eventfd_chr); - } - if (s->peers) { for (i = 0; i < s->nb_peers; i++) { close_peer_eventfds(s, i); -- cgit 1.4.1 From c20fc0c3ee1ca83e6f3416acad31439bffed7977 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:29 +0100 Subject: ivshmem: Drop ivshmem_event() stub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-15-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 9ea79ab01d..8356399092 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -267,11 +267,6 @@ static int ivshmem_can_receive(void * opaque) return sizeof(int64_t); } -static void ivshmem_event(void *opaque, int event) -{ - IVSHMEM_DPRINTF("ivshmem_event %d\n", event); -} - static void ivshmem_vector_notify(void *opaque) { MSIVector *entry = opaque; @@ -720,7 +715,7 @@ static void ivshmem_check_version(void *opaque, const uint8_t * buf, int size) IVSHMEM_DPRINTF("version check ok, switch to real chardev handler\n"); qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, ivshmem_read, - ivshmem_event, s); + NULL, s); } /* Select the MSI-X vectors used by device. @@ -942,7 +937,7 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) pci_register_bar(dev, 2, attr, &s->bar); qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, - ivshmem_check_version, ivshmem_event, s); + ivshmem_check_version, NULL, s); } else { /* just map the file immediately, we're not using a server */ int fd; -- cgit 1.4.1 From 71c265816dd2772f89ebb377381c836dfca09d70 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:30 +0100 Subject: ivshmem: Don't destroy the chardev on version mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Yes, the chardev is commonly useless after we read a bad version from it, but destroying it is inappropriate anyway: the user created it, so the user should be able to hold on to it as long as he likes. We don't destroy it on other errors. Screwed up in commit 5105b1d. Stop reading instead. Also note QEMU's behavior in ivshmem-spec.txt. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-16-git-send-email-armbru@redhat.com> --- docs/specs/ivshmem-spec.txt | 3 +++ hw/misc/ivshmem.c | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/docs/specs/ivshmem-spec.txt b/docs/specs/ivshmem-spec.txt index 0e9185a04b..0cd63adff0 100644 --- a/docs/specs/ivshmem-spec.txt +++ b/docs/specs/ivshmem-spec.txt @@ -187,6 +187,9 @@ Each message consists of a single 8 byte little-endian signed number, and may be accompanied by a file descriptor via SCM_RIGHTS. Both client and server close the connection on error. +Note: QEMU currently doesn't close the connection right on error, but +only when the character device is destroyed. + On connect, the server sends the following messages in order: 1. The protocol version number, currently zero. The client should diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 8356399092..0ac0238c7f 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -708,8 +708,7 @@ static void ivshmem_check_version(void *opaque, const uint8_t * buf, int size) if (tmp != -1 || version != IVSHMEM_PROTOCOL_VERSION) { fprintf(stderr, "incompatible version, you are connecting to a ivshmem-" "server using a different protocol please check your setup\n"); - qemu_chr_delete(s->server_chr); - s->server_chr = NULL; + qemu_chr_add_handlers(s->server_chr, NULL, NULL, NULL, s); return; } -- cgit 1.4.1 From 9cf70c52253ccadc137d40eb0de2c0f25a127334 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:31 +0100 Subject: ivshmem: Fix harmless misuse of Error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We reuse errp after passing it host_memory_backend_get_memory(). If both host_memory_backend_get_memory() and the reuse set an error, the reuse will fail the assertion in error_setv(). Fortunately, host_memory_backend_get_memory() can't fail. Pass it &error_abort to make our assumption explicit, and to get the assertion failure in the right place should it become invalid. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-17-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 0ac0238c7f..299cf5bda2 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -842,7 +842,7 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) g_warning("size argument ignored with hostmem"); } - mr = host_memory_backend_get_memory(s->hostmem, errp); + mr = host_memory_backend_get_memory(s->hostmem, &error_abort); s->ivshmem_size = memory_region_size(mr); } else if (s->sizearg == NULL) { s->ivshmem_size = 4 << 20; /* 4 MB default */ @@ -907,7 +907,8 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) IVSHMEM_DPRINTF("using hostmem\n"); - mr = host_memory_backend_get_memory(MEMORY_BACKEND(s->hostmem), errp); + mr = host_memory_backend_get_memory(MEMORY_BACKEND(s->hostmem), + &error_abort); vmstate_register_ram(mr, DEVICE(s)); memory_region_add_subregion(&s->bar, 0, mr); pci_register_bar(PCI_DEVICE(s), 2, attr, &s->bar); @@ -1134,7 +1135,7 @@ static void ivshmem_check_memdev_is_busy(Object *obj, const char *name, { MemoryRegion *mr; - mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), errp); + mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), &error_abort); if (memory_region_is_mapped(mr)) { char *path = object_get_canonical_path_component(val); error_setg(errp, "can't use already busy memdev: %s", path); -- cgit 1.4.1 From d855e2756554583b2b041c9e0a8381bdd202cbc3 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:32 +0100 Subject: ivshmem: Failed realize() can leave migration blocker behind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If pci_ivshmem_realize() fails after it created its migration blocker, the blocker is left in place. Fix that by creating it last. Likewise, if it fails after it called fifo8_create(), it leaks fifo memory. Fix that the same way. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-18-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 299cf5bda2..51ad255857 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -825,6 +825,7 @@ static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) { IVShmemState *s = IVSHMEM(dev); + Error *err = NULL; uint8_t *pci_conf; uint8_t attr = PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_PREFETCH; @@ -856,8 +857,6 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) s->ivshmem_size = size; } - fifo8_create(&s->incoming_fifo, sizeof(int64_t)); - /* IRQFD requires MSI */ if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) && !ivshmem_has_feature(s, IVSHMEM_MSI)) { @@ -879,12 +878,6 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) s->role_val = IVSHMEM_MASTER; /* default */ } - if (s->role_val == IVSHMEM_PEER) { - error_setg(&s->migration_blocker, - "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); - migrate_add_blocker(s->migration_blocker); - } - pci_conf = dev->config; pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; @@ -963,7 +956,19 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) return; } - create_shared_memory_BAR(s, fd, attr, errp); + create_shared_memory_BAR(s, fd, attr, &err); + if (err) { + error_propagate(errp, err); + return; + } + } + + fifo8_create(&s->incoming_fifo, sizeof(int64_t)); + + if (s->role_val == IVSHMEM_PEER) { + error_setg(&s->migration_blocker, + "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); + migrate_add_blocker(s->migration_blocker); } } -- cgit 1.4.1 From 434ad76db531b4bcad241aa4b2abd4bccacca89a Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:33 +0100 Subject: ivshmem: Clean up register callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-19-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 51ad255857..1debce3e2b 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -121,12 +121,10 @@ static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, return (ivs->features & (1 << feature)); } -/* accessing registers - based on rtl8139 */ static void ivshmem_update_irq(IVShmemState *s) { PCIDevice *d = PCI_DEVICE(s); - int isr; - isr = (s->intrstatus & s->intrmask) & 0xffffffff; + uint32_t isr = s->intrstatus & s->intrmask; /* don't print ISR resets */ if (isr) { @@ -134,7 +132,7 @@ static void ivshmem_update_irq(IVShmemState *s) isr ? 1 : 0, s->intrstatus, s->intrmask); } - pci_set_irq(d, (isr != 0)); + pci_set_irq(d, isr != 0); } static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) @@ -142,7 +140,6 @@ static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val); s->intrmask = val; - ivshmem_update_irq(s); } @@ -151,7 +148,6 @@ static uint32_t ivshmem_IntrMask_read(IVShmemState *s) uint32_t ret = s->intrmask; IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret); - return ret; } @@ -160,7 +156,6 @@ static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val) IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val); s->intrstatus = val; - ivshmem_update_irq(s); } @@ -170,9 +165,7 @@ static uint32_t ivshmem_IntrStatus_read(IVShmemState *s) /* reading ISR clears all interrupts */ s->intrstatus = 0; - ivshmem_update_irq(s); - return ret; } -- cgit 1.4.1 From 082751e82bed9482fc75863e0b305949bb230c6a Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:34 +0100 Subject: ivshmem: Clean up MSI-X conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are three predicates related to MSI-X: * ivshmem_has_feature(s, IVSHMEM_MSI) is true unless the non-MSI-X variant of the device is selected with msi=off. * msix_present() is true when the device has the PCI capability MSI-X. It's initially false, and becomes true during successful realize of the MSI-X variant of the device. Thus, it's the same as ivshmem_has_feature(s, IVSHMEM_MSI) for realized devices. * msix_enabled() is true when msix_present() is true and guest software has enabled MSI-X. Code that differs between the non-MSI-X and the MSI-X variant of the device needs to be guarded by ivshmem_has_feature(s, IVSHMEM_MSI) or by msix_present(), except the latter works only for realized devices. Code that depends on whether MSI-X is in use needs to be guarded with msix_enabled(). Code review led me to two minor messes: * ivshmem_vector_notify() calls msix_notify() even when !msix_enabled(), unlike most other MSI-X-capable devices. As far as I can tell, msix_notify() does nothing when !msix_enabled(). Add the guard anyway. * Most callers of ivshmem_use_msix() guard it with ivshmem_has_feature(s, IVSHMEM_MSI). Not necessary, because ivshmem_use_msix() does nothing when !msix_present(). That's ivshmem's only use of msix_present(), though. Guard it consistently, and drop the now redundant msix_present() check. While there, rename ivshmem_use_msix() to ivshmem_msix_vector_use(). Signed-off-by: Markus Armbruster Message-Id: <1458066895-20632-20-git-send-email-armbru@redhat.com> Reviewed-by: Marc-André Lureau --- hw/misc/ivshmem.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 1debce3e2b..abcb1c1bfe 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -274,7 +274,9 @@ static void ivshmem_vector_notify(void *opaque) IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector); if (ivshmem_has_feature(s, IVSHMEM_MSI)) { - msix_notify(pdev, vector); + if (msix_enabled(pdev)) { + msix_notify(pdev, vector); + } } else { ivshmem_IntrStatus_write(s, 1); } @@ -713,16 +715,11 @@ static void ivshmem_check_version(void *opaque, const uint8_t * buf, int size) /* Select the MSI-X vectors used by device. * ivshmem maps events to vectors statically, so * we just enable all vectors on init and after reset. */ -static void ivshmem_use_msix(IVShmemState * s) +static void ivshmem_msix_vector_use(IVShmemState *s) { PCIDevice *d = PCI_DEVICE(s); int i; - IVSHMEM_DPRINTF("%s, msix present: %d\n", __func__, msix_present(d)); - if (!msix_present(d)) { - return; - } - for (i = 0; i < s->vectors; i++) { msix_vector_use(d, i); } @@ -734,7 +731,9 @@ static void ivshmem_reset(DeviceState *d) s->intrstatus = 0; s->intrmask = 0; - ivshmem_use_msix(s); + if (ivshmem_has_feature(s, IVSHMEM_MSI)) { + ivshmem_msix_vector_use(s); + } } static int ivshmem_setup_interrupts(IVShmemState *s) @@ -748,7 +747,7 @@ static int ivshmem_setup_interrupts(IVShmemState *s) } IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); - ivshmem_use_msix(s); + ivshmem_msix_vector_use(s); } return 0; @@ -1040,9 +1039,8 @@ static int ivshmem_post_load(void *opaque, int version_id) IVShmemState *s = opaque; if (ivshmem_has_feature(s, IVSHMEM_MSI)) { - ivshmem_use_msix(s); + ivshmem_msix_vector_use(s); } - return 0; } @@ -1070,7 +1068,7 @@ static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id) if (ivshmem_has_feature(s, IVSHMEM_MSI)) { msix_load(pdev, f); - ivshmem_use_msix(s); + ivshmem_msix_vector_use(s); } else { s->intrstatus = qemu_get_be32(f); s->intrmask = qemu_get_be32(f); -- cgit 1.4.1 From 2d1d422d1145085fdde6a90d70cbbff7103d469b Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:35 +0100 Subject: ivshmem: Leave INTx alone when using MSI-X MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ivshmem device can either use MSI-X or legacy INTx for interrupts. With MSI-X enabled, peer interrupt events trigger an MSI as they should. But software can still raise INTx via interrupt status and mask register in BAR 0. This is explicitly prohibited by PCI Local Bus Specification Revision 3.0, section 6.8.3.3: While enabled for MSI or MSI-X operation, a function is prohibited from using its INTx# pin (if implemented) to request service (MSI, MSI-X, and INTx# are mutually exclusive). Fix the device model to leave INTx alone when using MSI-X. Document that we claim to use INTx in config space even when we don't. Unlike other devices, ivshmem does *not* use INTx when configured for MSI-X and MSI-X isn't enabled by software. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Reviewed-by: Paolo Bonzini Message-Id: <1458066895-20632-21-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index abcb1c1bfe..65e3a76870 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -126,6 +126,11 @@ static void ivshmem_update_irq(IVShmemState *s) PCIDevice *d = PCI_DEVICE(s); uint32_t isr = s->intrstatus & s->intrmask; + /* No INTx with msi=on, whether the guest enabled MSI-X or not */ + if (ivshmem_has_feature(s, IVSHMEM_MSI)) { + return; + } + /* don't print ISR resets */ if (isr) { IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", @@ -873,6 +878,10 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) pci_conf = dev->config; pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; + /* + * Note: we don't use INTx with IVSHMEM_MSI at all, so this is a + * bald-faced lie then. But it's a backwards compatible lie. + */ pci_config_set_interrupt_pin(pci_conf, 1); memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s, -- cgit 1.4.1 From 3c27969b3e7397bb63f363fc8b8bc0e601542d76 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:36 +0100 Subject: ivshmem: Assert interrupts are set up once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An interrupt is set up when the interrupt's file descriptor is received. Each message applies to the next interrupt vector. Therefore, each vector cannot be set up more than once. ivshmem_add_kvm_msi_virq() half-heartedly tries not to rely on this by doing nothing then, but that's not going to recover from this error should it become possible in the future. watch_vector_notifier() doesn't even try. Simply assert what is the case, so we get alerted if we ever screw it up. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-22-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 65e3a76870..61e21cd80c 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -349,7 +349,7 @@ static void watch_vector_notifier(IVShmemState *s, EventNotifier *n, { int eventfd = event_notifier_get_fd(n); - /* if MSI is supported we need multiple interrupts */ + assert(!s->msi_vectors[vector].pdev); s->msi_vectors[vector].pdev = PCI_DEVICE(s); qemu_set_fd_handler(eventfd, ivshmem_vector_notify, @@ -535,10 +535,7 @@ static int ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector) int ret; IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); - - if (s->msi_vectors[vector].pdev != NULL) { - return 0; - } + assert(!s->msi_vectors[vector].pdev); ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev); if (ret < 0) { -- cgit 1.4.1 From cd9953f720e1d57cfa86bc0882abced45ba96d3c Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:37 +0100 Subject: ivshmem: Simplify rejection of invalid peer ID from server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ivshmem_read() processes server messages. These are 64 bit signed integers. -1 is shared memory setup, 16 bit unsigned is a peer ID, anything else is invalid. ivshmem_read() rejects invalid negative messages right away, silently. Invalid positive messages get rejected only in resize_peers(), and ivshmem_read() then prints the rather cryptic message "failed to resize peers array". Extend the first check to cover all invalid messages, make it report "server sent invalid message", and drop the second check. Now resize_peers() can't fail anymore; simplify. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-23-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 61 ++++++++++++++++++++----------------------------------- 1 file changed, 22 insertions(+), 39 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 61e21cd80c..703b3bff4c 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -39,7 +39,7 @@ #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET #define PCI_DEVICE_ID_IVSHMEM 0x1110 -#define IVSHMEM_MAX_PEERS G_MAXUINT16 +#define IVSHMEM_MAX_PEERS UINT16_MAX #define IVSHMEM_IOEVENTFD 0 #define IVSHMEM_MSI 1 @@ -93,7 +93,7 @@ typedef struct IVShmemState { uint32_t ivshmem_64bit; Peer *peers; - int nb_peers; /* how many peers we have space for */ + int nb_peers; /* space in @peers[] */ int vm_id; uint32_t vectors; @@ -451,34 +451,21 @@ static void close_peer_eventfds(IVShmemState *s, int posn) s->peers[posn].nb_eventfds = 0; } -/* this function increase the dynamic storage need to store data about other - * peers */ -static int resize_peers(IVShmemState *s, int new_min_size) +static void resize_peers(IVShmemState *s, int nb_peers) { + int old_nb_peers = s->nb_peers; + int i; - int j, old_size; - - /* limit number of max peers */ - if (new_min_size <= 0 || new_min_size > IVSHMEM_MAX_PEERS) { - return -1; - } - if (new_min_size <= s->nb_peers) { - return 0; - } - - old_size = s->nb_peers; - s->nb_peers = new_min_size; - - IVSHMEM_DPRINTF("bumping storage to %d peers\n", s->nb_peers); + assert(nb_peers > old_nb_peers); + IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers); - s->peers = g_realloc(s->peers, s->nb_peers * sizeof(Peer)); + s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer)); + s->nb_peers = nb_peers; - for (j = old_size; j < s->nb_peers; j++) { - s->peers[j].eventfds = g_new0(EventNotifier, s->vectors); - s->peers[j].nb_eventfds = 0; + for (i = old_nb_peers; i < nb_peers; i++) { + s->peers[i].eventfds = g_new0(EventNotifier, s->vectors); + s->peers[i].nb_eventfds = 0; } - - return 0; } static bool fifo_update_and_get(IVShmemState *s, const uint8_t *buf, int size, @@ -590,25 +577,21 @@ static void ivshmem_read(void *opaque, const uint8_t *buf, int size) return; } - if (incoming_posn < -1) { - IVSHMEM_DPRINTF("invalid incoming_posn %" PRId64 "\n", incoming_posn); - return; - } - - /* pick off s->server_chr->msgfd and store it, posn should accompany msg */ incoming_fd = qemu_chr_fe_get_msgfd(s->server_chr); IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", incoming_posn, incoming_fd); - /* make sure we have enough space for this peer */ - if (incoming_posn >= s->nb_peers) { - if (resize_peers(s, incoming_posn + 1) < 0) { - error_report("failed to resize peers array"); - if (incoming_fd != -1) { - close(incoming_fd); - } - return; + if (incoming_posn < -1 || incoming_posn > IVSHMEM_MAX_PEERS) { + error_report("server sent invalid message %" PRId64, + incoming_posn); + if (incoming_fd != -1) { + close(incoming_fd); } + return; + } + + if (incoming_posn >= s->nb_peers) { + resize_peers(s, incoming_posn + 1); } peer = &s->peers[incoming_posn]; -- cgit 1.4.1 From ca0b7566cc2cbd245e804fe03d556b0dbee1fd2e Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:38 +0100 Subject: ivshmem: Disentangle ivshmem_read() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-24-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 159 +++++++++++++++++++++++++++--------------------------- 1 file changed, 80 insertions(+), 79 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 703b3bff4c..d8d363e9ac 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -564,114 +564,115 @@ static void setup_interrupt(IVShmemState *s, int vector) } } -static void ivshmem_read(void *opaque, const uint8_t *buf, int size) +static void process_msg_shmem(IVShmemState *s, int fd) { - IVShmemState *s = opaque; - int incoming_fd; - int new_eventfd; - int64_t incoming_posn; Error *err = NULL; - Peer *peer; + void *ptr; - if (!fifo_update_and_get_i64(s, buf, size, &incoming_posn)) { + if (memory_region_is_mapped(&s->ivshmem)) { + error_report("shm already initialized"); + close(fd); return; } - incoming_fd = qemu_chr_fe_get_msgfd(s->server_chr); - IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", - incoming_posn, incoming_fd); - - if (incoming_posn < -1 || incoming_posn > IVSHMEM_MAX_PEERS) { - error_report("server sent invalid message %" PRId64, - incoming_posn); - if (incoming_fd != -1) { - close(incoming_fd); - } + if (check_shm_size(s, fd, &err) == -1) { + error_report_err(err); + close(fd); return; } - if (incoming_posn >= s->nb_peers) { - resize_peers(s, incoming_posn + 1); + /* mmap the region and map into the BAR2 */ + ptr = mmap(0, s->ivshmem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (ptr == MAP_FAILED) { + error_report("Failed to mmap shared memory %s", strerror(errno)); + close(fd); + return; } + memory_region_init_ram_ptr(&s->ivshmem, OBJECT(s), + "ivshmem.bar2", s->ivshmem_size, ptr); + qemu_set_ram_fd(memory_region_get_ram_addr(&s->ivshmem), fd); + vmstate_register_ram(&s->ivshmem, DEVICE(s)); + memory_region_add_subregion(&s->bar, 0, &s->ivshmem); +} - peer = &s->peers[incoming_posn]; +static void process_msg_disconnect(IVShmemState *s, uint16_t posn) +{ + IVSHMEM_DPRINTF("posn %d has gone away\n", posn); + close_peer_eventfds(s, posn); +} - if (incoming_fd == -1) { - /* if posn is positive and unseen before then this is our posn*/ - if (incoming_posn >= 0 && s->vm_id == -1) { - /* receive our posn */ - s->vm_id = incoming_posn; - } else { - /* otherwise an fd == -1 means an existing peer has gone away */ - IVSHMEM_DPRINTF("posn %" PRId64 " has gone away\n", incoming_posn); - close_peer_eventfds(s, incoming_posn); - } +static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd) +{ + Peer *peer = &s->peers[posn]; + int vector; + + /* + * The N-th connect message for this peer comes with the file + * descriptor for vector N-1. Count messages to find the vector. + */ + if (peer->nb_eventfds >= s->vectors) { + error_report("Too many eventfd received, device has %d vectors", + s->vectors); + close(fd); return; } + vector = peer->nb_eventfds++; - /* if the position is -1, then it's shared memory region fd */ - if (incoming_posn == -1) { - void * map_ptr; - - if (memory_region_is_mapped(&s->ivshmem)) { - error_report("shm already initialized"); - close(incoming_fd); - return; - } - - if (check_shm_size(s, incoming_fd, &err) == -1) { - error_report_err(err); - close(incoming_fd); - return; - } + IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd); + event_notifier_init_fd(&peer->eventfds[vector], fd); + fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ - /* mmap the region and map into the BAR2 */ - map_ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED, - incoming_fd, 0); - if (map_ptr == MAP_FAILED) { - error_report("Failed to mmap shared memory %s", strerror(errno)); - close(incoming_fd); - return; - } - memory_region_init_ram_ptr(&s->ivshmem, OBJECT(s), - "ivshmem.bar2", s->ivshmem_size, map_ptr); - qemu_set_ram_fd(memory_region_get_ram_addr(&s->ivshmem), - incoming_fd); - vmstate_register_ram(&s->ivshmem, DEVICE(s)); + if (posn == s->vm_id) { + setup_interrupt(s, vector); + } - IVSHMEM_DPRINTF("guest h/w addr = %p, size = %" PRIu64 "\n", - map_ptr, s->ivshmem_size); + if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { + ivshmem_add_eventfd(s, posn, vector); + } +} - memory_region_add_subregion(&s->bar, 0, &s->ivshmem); +static void process_msg(IVShmemState *s, int64_t msg, int fd) +{ + IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); + if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { + error_report("server sent invalid message %" PRId64, msg); + close(fd); return; } - /* each peer has an associated array of eventfds, and we keep - * track of how many eventfds received so far */ - /* get a new eventfd: */ - if (peer->nb_eventfds >= s->vectors) { - error_report("Too many eventfd received, device has %d vectors", - s->vectors); - close(incoming_fd); + if (msg == -1) { + process_msg_shmem(s, fd); return; } - new_eventfd = peer->nb_eventfds++; - - /* this is an eventfd for a particular peer VM */ - IVSHMEM_DPRINTF("eventfds[%" PRId64 "][%d] = %d\n", incoming_posn, - new_eventfd, incoming_fd); - event_notifier_init_fd(&peer->eventfds[new_eventfd], incoming_fd); - fcntl_setfl(incoming_fd, O_NONBLOCK); /* msix/irqfd poll non block */ + if (msg >= s->nb_peers) { + resize_peers(s, msg + 1); + } - if (incoming_posn == s->vm_id) { - setup_interrupt(s, new_eventfd); + if (fd >= 0) { + process_msg_connect(s, msg, fd); + } else if (s->vm_id == -1) { + s->vm_id = msg; + } else { + process_msg_disconnect(s, msg); } +} - if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { - ivshmem_add_eventfd(s, incoming_posn, new_eventfd); +static void ivshmem_read(void *opaque, const uint8_t *buf, int size) +{ + IVShmemState *s = opaque; + int fd; + int64_t msg; + + if (!fifo_update_and_get_i64(s, buf, size, &msg)) { + return; } + + fd = qemu_chr_fe_get_msgfd(s->server_chr); + IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); + + process_msg(s, msg, fd); } static void ivshmem_check_version(void *opaque, const uint8_t * buf, int size) -- cgit 1.4.1 From 9db51b4d64ded01536b3851a5a50e484ac2f7899 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:39 +0100 Subject: ivshmem: Plug leaks on unplug, fix peer disconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close_peer_eventfds() cleans up three things: ioeventfd triggers if they exist, eventfds, and the array to store them. Commit 98609cd (v1.2.0) fixed it not to clean up ioeventfd triggers when they don't exist (property ioeventfd=off, which is the default). Unfortunately, the fix also made it skip cleanup of the eventfds and the array then. This is a memory and file descriptor leak on unplug. Additionally, the reset of nb_eventfds is skipped. Doesn't matter on unplug. On peer disconnect, however, this permanently wedges the interrupt vectors used for that peer's ID. The eventfds stay behind, but aren't connected to a peer anymore. When the ID gets recycled for a new peer, the new peer's eventfds get assigned to vectors after the old ones. Commonly, the device's number of vectors matches the server's, so the new ones get dropped with a "Too many eventfd received" message. Interrupts either don't work (common case) or go to the wrong vector. Fix by narrowing the conditional to just the ioeventfd trigger cleanup. While there, move the "invalid" peer check to the only caller where it can actually happen, and tighten it to reject own ID. Cc: Paolo Bonzini Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-25-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index d8d363e9ac..c6d5dd5f04 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -428,21 +428,17 @@ static void close_peer_eventfds(IVShmemState *s, int posn) { int i, n; - if (!ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { - return; - } - if (posn < 0 || posn >= s->nb_peers) { - error_report("invalid peer %d", posn); - return; - } - + assert(posn >= 0 && posn < s->nb_peers); n = s->peers[posn].nb_eventfds; - memory_region_transaction_begin(); - for (i = 0; i < n; i++) { - ivshmem_del_eventfd(s, posn, i); + if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { + memory_region_transaction_begin(); + for (i = 0; i < n; i++) { + ivshmem_del_eventfd(s, posn, i); + } + memory_region_transaction_commit(); } - memory_region_transaction_commit(); + for (i = 0; i < n; i++) { event_notifier_cleanup(&s->peers[posn].eventfds[i]); } @@ -598,6 +594,10 @@ static void process_msg_shmem(IVShmemState *s, int fd) static void process_msg_disconnect(IVShmemState *s, uint16_t posn) { IVSHMEM_DPRINTF("posn %d has gone away\n", posn); + if (posn >= s->nb_peers || posn == s->vm_id) { + error_report("invalid peer %d", posn); + return; + } close_peer_eventfds(s, posn); } -- cgit 1.4.1 From 3a55fc0f243104998bee5106b121cff257df5d33 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:40 +0100 Subject: ivshmem: Receive shared memory synchronously in realize() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When configured for interrupts (property "chardev" given), we receive the shared memory from an ivshmem server. We do so asynchronously after realize() completes, by setting up callbacks with qemu_chr_add_handlers(). Keeping server I/O out of realize() that way avoids delays due to a slow server. This is probably relevant only for hot plug. However, this funny "no shared memory, yet" state of the device also causes a raft of issues that are hard or impossible to work around: * The guest is exposed to this state: when we enter and leave it its shared memory contents is apruptly replaced, and device register IVPosition changes. This is a known issue. We document that guests should not access the shared memory after device initialization until the IVPosition register becomes non-negative. For cold plug, the funny state is unlikely to be visible in practice, because we normally receive the shared memory long before the guest gets around to mess with the device. For hot plug, the timing is tighter, but the relative slowness of PCI device configuration has a good chance to hide the funny state. In either case, guests complying with the documented procedure are safe. * Migration becomes racy. If migration completes before the shared memory setup completes on the source, shared memory contents is silently lost. Fortunately, migration is rather unlikely to win this race. If the shared memory's ramblock arrives at the destination before shared memory setup completes, migration fails. There is no known way for a management application to wait for shared memory setup to complete. All you can do is retry failed migration. You can improve your chances by leaving more time between running the destination QEMU and the migrate command. To mitigate silent memory loss, you need to ensure the server initializes shared memory exactly the same on source and destination. These issues are entirely undocumented so far. I'd expect the server to be almost always fast enough to hide these issues. But then rare catastrophic races are in a way the worst kind. This is way more trouble than I'm willing to take from any device. Kill the funny state by receiving shared memory synchronously in realize(). If your hot plug hangs, go kill your ivshmem server. For easier review, this commit only makes the receive synchronous, it doesn't add the necessary error propagation. Without that, the funny state persists. The next commit will do that, and kill it off for real. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-26-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 68 ++++++++++++++++++++++++++++++++++++---------------- tests/ivshmem-test.c | 26 ++++++-------------- 2 files changed, 55 insertions(+), 39 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index c6d5dd5f04..ad168288ea 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -675,27 +675,45 @@ static void ivshmem_read(void *opaque, const uint8_t *buf, int size) process_msg(s, msg, fd); } -static void ivshmem_check_version(void *opaque, const uint8_t * buf, int size) +static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd) { - IVShmemState *s = opaque; - int tmp; - int64_t version; + int64_t msg; + int n, ret; + + n = 0; + do { + ret = qemu_chr_fe_read_all(s->server_chr, (uint8_t *)&msg + n, + sizeof(msg) - n); + if (ret < 0 && ret != -EINTR) { + /* TODO error handling */ + return INT64_MIN; + } + n += ret; + } while (n < sizeof(msg)); - if (!fifo_update_and_get_i64(s, buf, size, &version)) { - return; - } + *pfd = qemu_chr_fe_get_msgfd(s->server_chr); + return msg; +} - tmp = qemu_chr_fe_get_msgfd(s->server_chr); - if (tmp != -1 || version != IVSHMEM_PROTOCOL_VERSION) { +static void ivshmem_recv_setup(IVShmemState *s) +{ + int64_t msg; + int fd; + + msg = ivshmem_recv_msg(s, &fd); + if (fd != -1 || msg != IVSHMEM_PROTOCOL_VERSION) { fprintf(stderr, "incompatible version, you are connecting to a ivshmem-" "server using a different protocol please check your setup\n"); - qemu_chr_add_handlers(s->server_chr, NULL, NULL, NULL, s); return; } - IVSHMEM_DPRINTF("version check ok, switch to real chardev handler\n"); - qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, ivshmem_read, - NULL, s); + /* + * Receive more messages until we got shared memory. + */ + do { + msg = ivshmem_recv_msg(s, &fd); + process_msg(s, msg, fd); + } while (msg != -1); } /* Select the MSI-X vectors used by device. @@ -900,19 +918,29 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", s->server_chr->filename); - if (ivshmem_setup_interrupts(s) < 0) { - error_setg(errp, "failed to initialize interrupts"); - return; - } - /* we allocate enough space for 16 peers and grow as needed */ resize_peers(s, 16); s->vm_id = -1; pci_register_bar(dev, 2, attr, &s->bar); - qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, - ivshmem_check_version, NULL, s); + /* + * Receive setup messages from server synchronously. + * Older versions did it asynchronously, but that creates a + * number of entertaining race conditions. + * TODO Propagate errors! Without that, we still have races + * on errors. + */ + ivshmem_recv_setup(s); + if (memory_region_is_mapped(&s->ivshmem)) { + qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, + ivshmem_read, NULL, s); + } + + if (ivshmem_setup_interrupts(s) < 0) { + error_setg(errp, "failed to initialize interrupts"); + return; + } } else { /* just map the file immediately, we're not using a server */ int fd; diff --git a/tests/ivshmem-test.c b/tests/ivshmem-test.c index 7b6b957e54..c7f3758df0 100644 --- a/tests/ivshmem-test.c +++ b/tests/ivshmem-test.c @@ -309,35 +309,23 @@ static void test_ivshmem_server(bool msi) ret = ivshmem_server_start(&server); g_assert_cmpint(ret, ==, 0); - setup_vm_with_server(&state1, nvectors, msi); - s1 = &state1; - setup_vm_with_server(&state2, nvectors, msi); - s2 = &state2; - - /* check state before server sends stuff */ - g_assert_cmpuint(in_reg(s1, IVPOSITION), ==, 0xffffffff); - g_assert_cmpuint(in_reg(s2, IVPOSITION), ==, 0xffffffff); - g_assert_cmpuint(qtest_readb(s1->qtest, (uintptr_t)s1->mem_base), ==, 0x00); - thread.server = &server; ret = pipe(thread.pipe); g_assert_cmpint(ret, ==, 0); thread.thread = g_thread_new("ivshmem-server", server_thread, &thread); g_assert(thread.thread != NULL); - /* waiting for devices to become operational */ - while (g_get_monotonic_time() < end_time) { - g_usleep(1000); - if ((int)in_reg(s1, IVPOSITION) >= 0 && - (int)in_reg(s2, IVPOSITION) >= 0) { - break; - } - } + setup_vm_with_server(&state1, nvectors, msi); + s1 = &state1; + setup_vm_with_server(&state2, nvectors, msi); + s2 = &state2; /* check got different VM ids */ vm1 = in_reg(s1, IVPOSITION); vm2 = in_reg(s2, IVPOSITION); - g_assert_cmpuint(vm1, !=, vm2); + g_assert_cmpint(vm1, >=, 0); + g_assert_cmpint(vm2, >=, 0); + g_assert_cmpint(vm1, !=, vm2); /* check number of MSI-X vectors */ global_qtest = s1->qtest; -- cgit 1.4.1 From 1309cf448a6d88d8a693c15d5b11ad07af2321ab Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:41 +0100 Subject: ivshmem: Propagate errors through ivshmem_recv_setup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This kills off the funny state described in the previous commit. Simplify ivshmem_io_read() accordingly, and update documentation. Signed-off-by: Markus Armbruster Message-Id: <1458066895-20632-27-git-send-email-armbru@redhat.com> Reviewed-by: Marc-André Lureau --- docs/specs/ivshmem-spec.txt | 20 +++---- hw/misc/ivshmem.c | 129 ++++++++++++++++++++++++++++---------------- qemu-doc.texi | 9 +--- 3 files changed, 95 insertions(+), 63 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/docs/specs/ivshmem-spec.txt b/docs/specs/ivshmem-spec.txt index 0cd63adff0..4c33973552 100644 --- a/docs/specs/ivshmem-spec.txt +++ b/docs/specs/ivshmem-spec.txt @@ -62,11 +62,11 @@ There are two ways to use this device: likely want to write a kernel driver to handle interrupts. Requires the device to be configured for interrupts, obviously. -If the device is configured for interrupts, BAR2 is initially invalid. -It becomes safely accessible only after the ivshmem server provided -the shared memory. Guest software should wait for the IVPosition -register (described below) to become non-negative before accessing -BAR2. +Before QEMU 2.6.0, BAR2 can initially be invalid if the device is +configured for interrupts. It becomes safely accessible only after +the ivshmem server provided the shared memory. Guest software should +wait for the IVPosition register (described below) to become +non-negative before accessing BAR2. The device is not capable to tell guest software whether it is configured for interrupts. @@ -82,7 +82,7 @@ BAR 0 contains the following registers: 4 4 read/write 0 Interrupt Status bit 0: peer interrupt bit 1..31: reserved - 8 4 read-only 0 or -1 IVPosition + 8 4 read-only 0 or ID IVPosition 12 4 write-only N/A Doorbell bit 0..15: vector bit 16..31: peer ID @@ -100,12 +100,14 @@ when an interrupt request from a peer is received. Reading the register clears it. IVPosition Register: if the device is not configured for interrupts, -this is zero. Else, it's -1 for a short while after reset, then -changes to the device's ID (between 0 and 65535). +this is zero. Else, it is the device's ID (between 0 and 65535). + +Before QEMU 2.6.0, the register may read -1 for a short while after +reset. There is no good way for software to find out whether the device is configured for interrupts. A positive IVPosition means interrupts, -but zero could be either. The initial -1 cannot be reliably observed. +but zero could be either. Doorbell Register: writing this register requests to interrupt a peer. The written value's high 16 bits are the ID of the peer to interrupt, diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index ad168288ea..7f439c3b04 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -234,12 +234,7 @@ static uint64_t ivshmem_io_read(void *opaque, hwaddr addr, break; case IVPOSITION: - /* return my VM ID if the memory is mapped */ - if (memory_region_is_mapped(&s->ivshmem)) { - ret = s->vm_id; - } else { - ret = -1; - } + ret = s->vm_id; break; default: @@ -511,7 +506,8 @@ static bool fifo_update_and_get_i64(IVShmemState *s, return false; } -static int ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector) +static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, + Error **errp) { PCIDevice *pdev = PCI_DEVICE(s); MSIMessage msg = msix_get_message(pdev, vector); @@ -522,22 +518,21 @@ static int ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector) ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev); if (ret < 0) { - error_report("ivshmem: kvm_irqchip_add_msi_route failed"); - return -1; + error_setg(errp, "kvm_irqchip_add_msi_route failed"); + return; } s->msi_vectors[vector].virq = ret; s->msi_vectors[vector].pdev = pdev; - - return 0; } -static void setup_interrupt(IVShmemState *s, int vector) +static void setup_interrupt(IVShmemState *s, int vector, Error **errp) { EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; bool with_irqfd = kvm_msi_via_irqfd_enabled() && ivshmem_has_feature(s, IVSHMEM_MSI); PCIDevice *pdev = PCI_DEVICE(s); + Error *err = NULL; IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); @@ -546,13 +541,16 @@ static void setup_interrupt(IVShmemState *s, int vector) watch_vector_notifier(s, n, vector); } else if (msix_enabled(pdev)) { IVSHMEM_DPRINTF("with irqfd\n"); - if (ivshmem_add_kvm_msi_virq(s, vector) < 0) { + ivshmem_add_kvm_msi_virq(s, vector, &err); + if (err) { + error_propagate(errp, err); return; } if (!msix_is_masked(pdev, vector)) { kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, s->msi_vectors[vector].virq); + /* TODO handle error */ } } else { /* it will be delayed until msix is enabled, in write_config */ @@ -560,19 +558,19 @@ static void setup_interrupt(IVShmemState *s, int vector) } } -static void process_msg_shmem(IVShmemState *s, int fd) +static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) { Error *err = NULL; void *ptr; if (memory_region_is_mapped(&s->ivshmem)) { - error_report("shm already initialized"); + error_setg(errp, "server sent unexpected shared memory message"); close(fd); return; } if (check_shm_size(s, fd, &err) == -1) { - error_report_err(err); + error_propagate(errp, err); close(fd); return; } @@ -580,7 +578,7 @@ static void process_msg_shmem(IVShmemState *s, int fd) /* mmap the region and map into the BAR2 */ ptr = mmap(0, s->ivshmem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (ptr == MAP_FAILED) { - error_report("Failed to mmap shared memory %s", strerror(errno)); + error_setg_errno(errp, errno, "Failed to mmap shared memory"); close(fd); return; } @@ -591,17 +589,19 @@ static void process_msg_shmem(IVShmemState *s, int fd) memory_region_add_subregion(&s->bar, 0, &s->ivshmem); } -static void process_msg_disconnect(IVShmemState *s, uint16_t posn) +static void process_msg_disconnect(IVShmemState *s, uint16_t posn, + Error **errp) { IVSHMEM_DPRINTF("posn %d has gone away\n", posn); if (posn >= s->nb_peers || posn == s->vm_id) { - error_report("invalid peer %d", posn); + error_setg(errp, "invalid peer %d", posn); return; } close_peer_eventfds(s, posn); } -static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd) +static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd, + Error **errp) { Peer *peer = &s->peers[posn]; int vector; @@ -611,8 +611,8 @@ static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd) * descriptor for vector N-1. Count messages to find the vector. */ if (peer->nb_eventfds >= s->vectors) { - error_report("Too many eventfd received, device has %d vectors", - s->vectors); + error_setg(errp, "Too many eventfd received, device has %d vectors", + s->vectors); close(fd); return; } @@ -623,7 +623,8 @@ static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd) fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ if (posn == s->vm_id) { - setup_interrupt(s, vector); + setup_interrupt(s, vector, errp); + /* TODO do we need to handle the error? */ } if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { @@ -631,18 +632,18 @@ static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd) } } -static void process_msg(IVShmemState *s, int64_t msg, int fd) +static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) { IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { - error_report("server sent invalid message %" PRId64, msg); + error_setg(errp, "server sent invalid message %" PRId64, msg); close(fd); return; } if (msg == -1) { - process_msg_shmem(s, fd); + process_msg_shmem(s, fd, errp); return; } @@ -651,17 +652,18 @@ static void process_msg(IVShmemState *s, int64_t msg, int fd) } if (fd >= 0) { - process_msg_connect(s, msg, fd); + process_msg_connect(s, msg, fd, errp); } else if (s->vm_id == -1) { s->vm_id = msg; } else { - process_msg_disconnect(s, msg); + process_msg_disconnect(s, msg, errp); } } static void ivshmem_read(void *opaque, const uint8_t *buf, int size) { IVShmemState *s = opaque; + Error *err = NULL; int fd; int64_t msg; @@ -672,10 +674,13 @@ static void ivshmem_read(void *opaque, const uint8_t *buf, int size) fd = qemu_chr_fe_get_msgfd(s->server_chr); IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); - process_msg(s, msg, fd); + process_msg(s, msg, fd, &err); + if (err) { + error_report_err(err); + } } -static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd) +static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp) { int64_t msg; int n, ret; @@ -685,7 +690,7 @@ static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd) ret = qemu_chr_fe_read_all(s->server_chr, (uint8_t *)&msg + n, sizeof(msg) - n); if (ret < 0 && ret != -EINTR) { - /* TODO error handling */ + error_setg_errno(errp, -ret, "read from server failed"); return INT64_MIN; } n += ret; @@ -695,15 +700,24 @@ static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd) return msg; } -static void ivshmem_recv_setup(IVShmemState *s) +static void ivshmem_recv_setup(IVShmemState *s, Error **errp) { + Error *err = NULL; int64_t msg; int fd; - msg = ivshmem_recv_msg(s, &fd); - if (fd != -1 || msg != IVSHMEM_PROTOCOL_VERSION) { - fprintf(stderr, "incompatible version, you are connecting to a ivshmem-" - "server using a different protocol please check your setup\n"); + msg = ivshmem_recv_msg(s, &fd, &err); + if (err) { + error_propagate(errp, err); + return; + } + if (msg != IVSHMEM_PROTOCOL_VERSION) { + error_setg(errp, "server sent version %" PRId64 ", expecting %d", + msg, IVSHMEM_PROTOCOL_VERSION); + return; + } + if (fd != -1) { + error_setg(errp, "server sent invalid version message"); return; } @@ -711,9 +725,25 @@ static void ivshmem_recv_setup(IVShmemState *s) * Receive more messages until we got shared memory. */ do { - msg = ivshmem_recv_msg(s, &fd); - process_msg(s, msg, fd); + msg = ivshmem_recv_msg(s, &fd, &err); + if (err) { + error_propagate(errp, err); + return; + } + process_msg(s, msg, fd, &err); + if (err) { + error_propagate(errp, err); + return; + } } while (msg != -1); + + /* + * This function must either map the shared memory or fail. The + * loop above ensures that: it terminates normally only after it + * successfully processed the server's shared memory message. + * Assert that actually mapped the shared memory: + */ + assert(memory_region_is_mapped(&s->ivshmem)); } /* Select the MSI-X vectors used by device. @@ -763,7 +793,13 @@ static void ivshmem_enable_irqfd(IVShmemState *s) int i; for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { - ivshmem_add_kvm_msi_virq(s, i); + Error *err = NULL; + + ivshmem_add_kvm_msi_virq(s, i, &err); + if (err) { + error_report_err(err); + /* TODO do we need to handle the error? */ + } } if (msix_set_vector_notifiers(pdev, @@ -809,7 +845,7 @@ static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, pci_default_write_config(pdev, address, val, len); is_enabled = msix_enabled(pdev); - if (kvm_msi_via_irqfd_enabled() && s->vm_id != -1) { + if (kvm_msi_via_irqfd_enabled()) { if (!was_enabled && is_enabled) { ivshmem_enable_irqfd(s); } else if (was_enabled && !is_enabled) { @@ -928,15 +964,16 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) * Receive setup messages from server synchronously. * Older versions did it asynchronously, but that creates a * number of entertaining race conditions. - * TODO Propagate errors! Without that, we still have races - * on errors. */ - ivshmem_recv_setup(s); - if (memory_region_is_mapped(&s->ivshmem)) { - qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, - ivshmem_read, NULL, s); + ivshmem_recv_setup(s, &err); + if (err) { + error_propagate(errp, err); + return; } + qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, + ivshmem_read, NULL, s); + if (ivshmem_setup_interrupts(s) < 0) { error_setg(errp, "failed to initialize interrupts"); return; diff --git a/qemu-doc.texi b/qemu-doc.texi index 65f3b29506..8afbbcdfea 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -1289,14 +1289,7 @@ qemu-system-i386 -device ivshmem,size=@var{shm-size},vectors=@var{vectors},chard When using the server, the guest will be assigned a VM ID (>=0) that allows guests using the same server to communicate via interrupts. Guests can read their -VM ID from a device register (see example code). Since receiving the shared -memory region from the server is asynchronous, there is a (small) chance the -guest may boot before the shared memory is attached. To allow an application -to ensure shared memory is attached, the VM ID register will return -1 (an -invalid VM ID) until the memory is attached. Once the shared memory is -attached, the VM ID will return the guest's valid VM ID. With these semantics, -the guest application can check to ensure the shared memory is attached to the -guest before proceeding. +VM ID from a device register (see ivshmem-spec.txt). The @option{role} argument can be set to either master or peer and will affect how the shared memory is migrated. With @option{role=master}, the guest will -- cgit 1.4.1 From a3feb08639e7982f47c3981fea79d527d3dfc0ac Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:42 +0100 Subject: ivshmem: Rely on server sending the ID right after the version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The protocol specification (ivshmem-spec.txt, formerly ivshmem_device_spec.txt) has always required the ID message to be sent right at the beginning, and ivshmem-server has always complied. The device, however, accepts it out of order. If an interrupt setup arrived before it, though, it would be misinterpreted as connect notification. Fix the latent bug by relying on the spec and ivshmem-server's actual behavior. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-28-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 7f439c3b04..da32a74004 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -653,8 +653,6 @@ static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) if (fd >= 0) { process_msg_connect(s, msg, fd, errp); - } else if (s->vm_id == -1) { - s->vm_id = msg; } else { process_msg_disconnect(s, msg, errp); } @@ -721,6 +719,30 @@ static void ivshmem_recv_setup(IVShmemState *s, Error **errp) return; } + /* + * ivshmem-server sends the remaining initial messages in a fixed + * order, but the device has always accepted them in any order. + * Stay as compatible as practical, just in case people use + * servers that behave differently. + */ + + /* + * ivshmem_device_spec.txt has always required the ID message + * right here, and ivshmem-server has always complied. However, + * older versions of the device accepted it out of order, but + * broke when an interrupt setup message arrived before it. + */ + msg = ivshmem_recv_msg(s, &fd, &err); + if (err) { + error_propagate(errp, err); + return; + } + if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) { + error_setg(errp, "server sent invalid ID message"); + return; + } + s->vm_id = msg; + /* * Receive more messages until we got shared memory. */ @@ -956,7 +978,6 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) /* we allocate enough space for 16 peers and grow as needed */ resize_peers(s, 16); - s->vm_id = -1; pci_register_bar(dev, 2, attr, &s->bar); -- cgit 1.4.1 From ba5970a178ef927c34cb8a6dfff54de0a58497c6 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:43 +0100 Subject: ivshmem: Drop the hackish test for UNIX domain chardev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chardev must be capable of transmitting SCM_RIGHTS ancillary messages. We check it by comparing CharDriverState member filename to "unix:". That's almost as brittle as it is disgusting. When the actual transmission all happened asynchronously, this check was all we could do in realize(), and thus better than nothing. But now we receive at least one SCM_RIGHTS synchronously in realize(), it's not worth its keep anymore. Drop it. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-29-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index da32a74004..c1a75db7c5 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -964,15 +964,6 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) memory_region_add_subregion(&s->bar, 0, mr); pci_register_bar(PCI_DEVICE(s), 2, attr, &s->bar); } else if (s->server_chr != NULL) { - /* FIXME do not rely on what chr drivers put into filename */ - if (strncmp(s->server_chr->filename, "unix:", 5)) { - error_setg(errp, "chardev is not a unix client socket"); - return; - } - - /* if we get a UNIX socket as the parameter we will talk - * to the ivshmem server to receive the memory region */ - IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", s->server_chr->filename); -- cgit 1.4.1 From ee276391a38c784fd1a3ce33eab0481348d518d1 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:44 +0100 Subject: ivshmem: Simplify how we cope with short reads from server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Short reads from a UNIX domain sockets are exceedingly unlikely when the other side always sends eight bytes and we always read eight bytes. We cope with them anyway. However, the code doing that is rather convoluted. Dumb it down radically. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-30-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 75 ++++++++++++------------------------------------------- 1 file changed, 16 insertions(+), 59 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index c1a75db7c5..7b9e76948a 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -26,7 +26,6 @@ #include "migration/migration.h" #include "qemu/error-report.h" #include "qemu/event_notifier.h" -#include "qemu/fifo8.h" #include "sysemu/char.h" #include "sysemu/hostmem.h" #include "qapi/visitor.h" @@ -80,7 +79,6 @@ typedef struct IVShmemState { uint32_t intrstatus; CharDriverState *server_chr; - Fifo8 incoming_fifo; MemoryRegion ivshmem_mmio; /* We might need to register the BAR before we actually have the memory. @@ -99,6 +97,8 @@ typedef struct IVShmemState { uint32_t vectors; uint32_t features; MSIVector *msi_vectors; + uint64_t msg_buf; /* buffer for receiving server messages */ + int msg_buffered_bytes; /* #bytes in @msg_buf */ Error *migration_blocker; @@ -255,11 +255,6 @@ static const MemoryRegionOps ivshmem_mmio_ops = { }, }; -static int ivshmem_can_receive(void * opaque) -{ - return sizeof(int64_t); -} - static void ivshmem_vector_notify(void *opaque) { MSIVector *entry = opaque; @@ -459,53 +454,6 @@ static void resize_peers(IVShmemState *s, int nb_peers) } } -static bool fifo_update_and_get(IVShmemState *s, const uint8_t *buf, int size, - void *data, size_t len) -{ - const uint8_t *p; - uint32_t num; - - assert(len <= sizeof(int64_t)); /* limitation of the fifo */ - if (fifo8_is_empty(&s->incoming_fifo) && size == len) { - memcpy(data, buf, size); - return true; - } - - IVSHMEM_DPRINTF("short read of %d bytes\n", size); - - num = MIN(size, sizeof(int64_t) - fifo8_num_used(&s->incoming_fifo)); - fifo8_push_all(&s->incoming_fifo, buf, num); - - if (fifo8_num_used(&s->incoming_fifo) < len) { - assert(num == 0); - return false; - } - - size -= num; - buf += num; - p = fifo8_pop_buf(&s->incoming_fifo, len, &num); - assert(num == len); - - memcpy(data, p, len); - - if (size > 0) { - fifo8_push_all(&s->incoming_fifo, buf, size); - } - - return true; -} - -static bool fifo_update_and_get_i64(IVShmemState *s, - const uint8_t *buf, int size, int64_t *i64) -{ - if (fifo_update_and_get(s, buf, size, i64, sizeof(*i64))) { - *i64 = GINT64_FROM_LE(*i64); - return true; - } - - return false; -} - static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, Error **errp) { @@ -658,6 +606,14 @@ static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) } } +static int ivshmem_can_receive(void *opaque) +{ + IVShmemState *s = opaque; + + assert(s->msg_buffered_bytes < sizeof(s->msg_buf)); + return sizeof(s->msg_buf) - s->msg_buffered_bytes; +} + static void ivshmem_read(void *opaque, const uint8_t *buf, int size) { IVShmemState *s = opaque; @@ -665,9 +621,14 @@ static void ivshmem_read(void *opaque, const uint8_t *buf, int size) int fd; int64_t msg; - if (!fifo_update_and_get_i64(s, buf, size, &msg)) { + assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf)); + memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size); + s->msg_buffered_bytes += size; + if (s->msg_buffered_bytes < sizeof(s->msg_buf)) { return; } + msg = le64_to_cpu(s->msg_buf); + s->msg_buffered_bytes = 0; fd = qemu_chr_fe_get_msgfd(s->server_chr); IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); @@ -1022,8 +983,6 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) } } - fifo8_create(&s->incoming_fifo, sizeof(int64_t)); - if (s->role_val == IVSHMEM_PEER) { error_setg(&s->migration_blocker, "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); @@ -1036,8 +995,6 @@ static void pci_ivshmem_exit(PCIDevice *dev) IVShmemState *s = IVSHMEM(dev); int i; - fifo8_destroy(&s->incoming_fifo); - if (s->migration_blocker) { migrate_del_blocker(s->migration_blocker); error_free(s->migration_blocker); -- cgit 1.4.1 From 08183c20b8b0782e4c30156eb7202d1117ca22f5 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:45 +0100 Subject: ivshmem: Tighten check of property "size" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If size_t is narrower than 64 bits, passing uint64_t ivshmem_size to mmap() truncates. Reject such sizes. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-31-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 7b9e76948a..66c713e7ab 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -87,7 +87,7 @@ typedef struct IVShmemState { */ MemoryRegion bar; MemoryRegion ivshmem; - uint64_t ivshmem_size; /* size of shared memory region */ + size_t ivshmem_size; /* size of shared memory region */ uint32_t ivshmem_64bit; Peer *peers; @@ -361,7 +361,7 @@ static int check_shm_size(IVShmemState *s, int fd, Error **errp) if (s->ivshmem_size > buf.st_size) { error_setg(errp, "Requested memory size greater" - " than shared object size (%" PRIu64 " > %" PRIu64")", + " than shared object size (%zu > %" PRIu64")", s->ivshmem_size, (uint64_t)buf.st_size); return -1; } else { @@ -865,7 +865,8 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) } else { char *end; int64_t size = qemu_strtosz(s->sizearg, &end); - if (size < 0 || *end != '\0' || !is_power_of_2(size)) { + if (size < 0 || (size_t)size != size || *end != '\0' + || !is_power_of_2(size)) { error_setg(errp, "Invalid size %s", s->sizearg); return; } -- cgit 1.4.1 From 5503e285041979dd29698ecb41729b3b22622e8d Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:46 +0100 Subject: ivshmem: Implement shm=... with a memory backend ivshmem has its very own code to create and map shared memory. Replace that with an implicitly created memory backend. Reduces the number of ways we create BAR 2 from three to two. The memory-backend-file is currently available only with CONFIG_LINUX, so this adds a second Linuxism to ivshmem (the other one is eventfd). Should we ever need to make it portable to systems where memory-backend-file can't be made to serve, we could create a memory-backend-shmem that allocates memory with shm_open(). Bonus fix: shared memory files are now created with permissions 0655 instead of 0777. Signed-off-by: Markus Armbruster Reviewed-by: Paolo Bonzini Message-Id: <1458066895-20632-32-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 79 ++++++++++++++++--------------------------------------- 1 file changed, 23 insertions(+), 56 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 66c713e7ab..138ae9d3bd 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -26,6 +26,7 @@ #include "migration/migration.h" #include "qemu/error-report.h" #include "qemu/event_notifier.h" +#include "qom/object_interfaces.h" #include "sysemu/char.h" #include "sysemu/hostmem.h" #include "qapi/visitor.h" @@ -369,31 +370,6 @@ static int check_shm_size(IVShmemState *s, int fd, Error **errp) } } -/* create the shared memory BAR when we are not using the server, so we can - * create the BAR and map the memory immediately */ -static int create_shared_memory_BAR(IVShmemState *s, int fd, uint8_t attr, - Error **errp) -{ - void * ptr; - - ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - if (ptr == MAP_FAILED) { - error_setg_errno(errp, errno, "Failed to mmap shared memory"); - return -1; - } - - memory_region_init_ram_ptr(&s->ivshmem, OBJECT(s), "ivshmem.bar2", - s->ivshmem_size, ptr); - qemu_set_ram_fd(memory_region_get_ram_addr(&s->ivshmem), fd); - vmstate_register_ram(&s->ivshmem, DEVICE(s)); - memory_region_add_subregion(&s->bar, 0, &s->ivshmem); - - /* region for shared memory */ - pci_register_bar(PCI_DEVICE(s), 2, attr, &s->bar); - - return 0; -} - static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i) { memory_region_add_eventfd(&s->ivshmem_mmio, @@ -837,6 +813,23 @@ static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, } } +static void desugar_shm(IVShmemState *s) +{ + Object *obj; + char *path; + + obj = object_new("memory-backend-file"); + path = g_strdup_printf("/dev/shm/%s", s->shmobj); + object_property_set_str(obj, path, "mem-path", &error_abort); + g_free(path); + object_property_set_int(obj, s->ivshmem_size, "size", &error_abort); + object_property_set_bool(obj, true, "share", &error_abort); + object_property_add_child(OBJECT(s), "internal-shm-backend", obj, + &error_abort); + user_creatable_complete(obj, &error_abort); + s->hostmem = MEMORY_BACKEND(obj); +} + static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) { IVShmemState *s = IVSHMEM(dev); @@ -915,6 +908,10 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) attr |= PCI_BASE_ADDRESS_MEM_TYPE_64; } + if (s->shmobj) { + desugar_shm(s); + } + if (s->hostmem != NULL) { MemoryRegion *mr; @@ -925,7 +922,7 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) vmstate_register_ram(mr, DEVICE(s)); memory_region_add_subregion(&s->bar, 0, mr); pci_register_bar(PCI_DEVICE(s), 2, attr, &s->bar); - } else if (s->server_chr != NULL) { + } else { IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", s->server_chr->filename); @@ -952,36 +949,6 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) error_setg(errp, "failed to initialize interrupts"); return; } - } else { - /* just map the file immediately, we're not using a server */ - int fd; - - IVSHMEM_DPRINTF("using shm_open (shm object = %s)\n", s->shmobj); - - /* try opening with O_EXCL and if it succeeds zero the memory - * by truncating to 0 */ - if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR|O_EXCL, - S_IRWXU|S_IRWXG|S_IRWXO)) > 0) { - /* truncate file to length PCI device's memory */ - if (ftruncate(fd, s->ivshmem_size) != 0) { - error_report("could not truncate shared file"); - } - - } else if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR, - S_IRWXU|S_IRWXG|S_IRWXO)) < 0) { - error_setg(errp, "could not open shared file"); - return; - } - - if (check_shm_size(s, fd, errp) == -1) { - return; - } - - create_shared_memory_BAR(s, fd, attr, &err); - if (err) { - error_propagate(errp, err); - return; - } } if (s->role_val == IVSHMEM_PEER) { -- cgit 1.4.1 From c2d8019cd7e9c38c5ef9dc44ae1e3adccd57a6a4 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:47 +0100 Subject: ivshmem: Simplify memory regions for BAR 2 (shared memory) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ivshmem_realize() puts the shared memory region in a container region. Used to be necessary to permit delayed mapping of the shared memory. However, we recently moved to synchronous mapping, in "ivshmem: Receive shared memory synchronously in realize()" and the commit following it. The container is redundant since then. Drop it. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Reviewed-by: Paolo Bonzini Message-Id: <1458066895-20632-33-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 47 +++++++++++++++++------------------------------ 1 file changed, 17 insertions(+), 30 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 138ae9d3bd..1b1de65974 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -82,12 +82,8 @@ typedef struct IVShmemState { CharDriverState *server_chr; MemoryRegion ivshmem_mmio; - /* We might need to register the BAR before we actually have the memory. - * So prepare a container MemoryRegion for the BAR immediately and - * add a subregion when we have the memory. - */ - MemoryRegion bar; - MemoryRegion ivshmem; + MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */ + MemoryRegion server_bar2; /* used with server_chr */ size_t ivshmem_size; /* size of shared memory region */ uint32_t ivshmem_64bit; @@ -487,7 +483,7 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) Error *err = NULL; void *ptr; - if (memory_region_is_mapped(&s->ivshmem)) { + if (s->ivshmem_bar2) { error_setg(errp, "server sent unexpected shared memory message"); close(fd); return; @@ -506,11 +502,10 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) close(fd); return; } - memory_region_init_ram_ptr(&s->ivshmem, OBJECT(s), + memory_region_init_ram_ptr(&s->server_bar2, OBJECT(s), "ivshmem.bar2", s->ivshmem_size, ptr); - qemu_set_ram_fd(memory_region_get_ram_addr(&s->ivshmem), fd); - vmstate_register_ram(&s->ivshmem, DEVICE(s)); - memory_region_add_subregion(&s->bar, 0, &s->ivshmem); + qemu_set_ram_fd(memory_region_get_ram_addr(&s->server_bar2), fd); + s->ivshmem_bar2 = &s->server_bar2; } static void process_msg_disconnect(IVShmemState *s, uint16_t posn, @@ -702,7 +697,7 @@ static void ivshmem_recv_setup(IVShmemState *s, Error **errp) * successfully processed the server's shared memory message. * Assert that actually mapped the shared memory: */ - assert(memory_region_is_mapped(&s->ivshmem)); + assert(s->ivshmem_bar2); } /* Select the MSI-X vectors used by device. @@ -903,7 +898,6 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->ivshmem_mmio); - memory_region_init(&s->bar, OBJECT(s), "ivshmem-bar2-container", s->ivshmem_size); if (s->ivshmem_64bit) { attr |= PCI_BASE_ADDRESS_MEM_TYPE_64; } @@ -913,15 +907,10 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) } if (s->hostmem != NULL) { - MemoryRegion *mr; - IVSHMEM_DPRINTF("using hostmem\n"); - mr = host_memory_backend_get_memory(MEMORY_BACKEND(s->hostmem), - &error_abort); - vmstate_register_ram(mr, DEVICE(s)); - memory_region_add_subregion(&s->bar, 0, mr); - pci_register_bar(PCI_DEVICE(s), 2, attr, &s->bar); + s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem, + &error_abort); } else { IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", s->server_chr->filename); @@ -929,8 +918,6 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) /* we allocate enough space for 16 peers and grow as needed */ resize_peers(s, 16); - pci_register_bar(dev, 2, attr, &s->bar); - /* * Receive setup messages from server synchronously. * Older versions did it asynchronously, but that creates a @@ -951,6 +938,9 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) } } + vmstate_register_ram(s->ivshmem_bar2, DEVICE(s)); + pci_register_bar(PCI_DEVICE(s), 2, attr, s->ivshmem_bar2); + if (s->role_val == IVSHMEM_PEER) { error_setg(&s->migration_blocker, "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); @@ -968,9 +958,9 @@ static void pci_ivshmem_exit(PCIDevice *dev) error_free(s->migration_blocker); } - if (memory_region_is_mapped(&s->ivshmem)) { + if (memory_region_is_mapped(s->ivshmem_bar2)) { if (!s->hostmem) { - void *addr = memory_region_get_ram_ptr(&s->ivshmem); + void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2); int fd; if (munmap(addr, s->ivshmem_size) == -1) { @@ -978,14 +968,11 @@ static void pci_ivshmem_exit(PCIDevice *dev) strerror(errno)); } - fd = qemu_get_ram_fd(memory_region_get_ram_addr(&s->ivshmem)); - if (fd != -1) { - close(fd); - } + fd = qemu_get_ram_fd(memory_region_get_ram_addr(s->ivshmem_bar2)); + close(fd); } - vmstate_unregister_ram(&s->ivshmem, DEVICE(dev)); - memory_region_del_subregion(&s->bar, &s->ivshmem); + vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev)); } if (s->peers) { -- cgit 1.4.1 From 8baeb22bfc3b57a8568c17b34c66ea2ff54df09a Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:48 +0100 Subject: ivshmem: Inline check_shm_size() into its only caller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve the error messages while there. Signed-off-by: Markus Armbruster Message-Id: <1458066895-20632-34-git-send-email-armbru@redhat.com> Reviewed-by: Marc-André Lureau --- hw/misc/ivshmem.c | 37 +++++++++++-------------------------- 1 file changed, 11 insertions(+), 26 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 1b1de65974..e6282ab6fc 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -343,29 +343,6 @@ static void watch_vector_notifier(IVShmemState *s, EventNotifier *n, NULL, &s->msi_vectors[vector]); } -static int check_shm_size(IVShmemState *s, int fd, Error **errp) -{ - /* check that the guest isn't going to try and map more memory than the - * the object has allocated return -1 to indicate error */ - - struct stat buf; - - if (fstat(fd, &buf) < 0) { - error_setg(errp, "exiting: fstat on fd %d failed: %s", - fd, strerror(errno)); - return -1; - } - - if (s->ivshmem_size > buf.st_size) { - error_setg(errp, "Requested memory size greater" - " than shared object size (%zu > %" PRIu64")", - s->ivshmem_size, (uint64_t)buf.st_size); - return -1; - } else { - return 0; - } -} - static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i) { memory_region_add_eventfd(&s->ivshmem_mmio, @@ -480,7 +457,7 @@ static void setup_interrupt(IVShmemState *s, int vector, Error **errp) static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) { - Error *err = NULL; + struct stat buf; void *ptr; if (s->ivshmem_bar2) { @@ -489,8 +466,16 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) return; } - if (check_shm_size(s, fd, &err) == -1) { - error_propagate(errp, err); + if (fstat(fd, &buf) < 0) { + error_setg_errno(errp, errno, + "can't determine size of shared memory sent by server"); + close(fd); + return; + } + + if (s->ivshmem_size > buf.st_size) { + error_setg(errp, "server sent only %zd bytes of shared memory", + (size_t)buf.st_size); close(fd); return; } -- cgit 1.4.1 From 2a845da73653bf98a3187bceb40364d9565f70c7 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:50 +0100 Subject: ivshmem: Replace int role_val by OnOffAuto master MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation of making it a qdev property. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-36-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index e6282ab6fc..f903fae5f9 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -43,9 +43,6 @@ #define IVSHMEM_IOEVENTFD 0 #define IVSHMEM_MSI 1 -#define IVSHMEM_PEER 0 -#define IVSHMEM_MASTER 1 - #define IVSHMEM_REG_BAR_SIZE 0x100 #define IVSHMEM_DEBUG 0 @@ -97,12 +94,12 @@ typedef struct IVShmemState { uint64_t msg_buf; /* buffer for receiving server messages */ int msg_buffered_bytes; /* #bytes in @msg_buf */ + OnOffAuto master; Error *migration_blocker; char * shmobj; char * sizearg; char * role; - int role_val; /* scalar to avoid multiple string comparisons */ } IVShmemState; /* registers for the Inter-VM shared memory device */ @@ -118,6 +115,12 @@ static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, return (ivs->features & (1 << feature)); } +static inline bool ivshmem_is_master(IVShmemState *s) +{ + assert(s->master != ON_OFF_AUTO_AUTO); + return s->master == ON_OFF_AUTO_ON; +} + static void ivshmem_update_irq(IVShmemState *s) { PCIDevice *d = PCI_DEVICE(s); @@ -856,15 +859,15 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) /* check that role is reasonable */ if (s->role) { if (strncmp(s->role, "peer", 5) == 0) { - s->role_val = IVSHMEM_PEER; + s->master = ON_OFF_AUTO_OFF; } else if (strncmp(s->role, "master", 7) == 0) { - s->role_val = IVSHMEM_MASTER; + s->master = ON_OFF_AUTO_ON; } else { error_setg(errp, "'role' must be 'peer' or 'master'"); return; } } else { - s->role_val = IVSHMEM_MASTER; /* default */ + s->master = ON_OFF_AUTO_AUTO; } pci_conf = dev->config; @@ -926,7 +929,11 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) vmstate_register_ram(s->ivshmem_bar2, DEVICE(s)); pci_register_bar(PCI_DEVICE(s), 2, attr, s->ivshmem_bar2); - if (s->role_val == IVSHMEM_PEER) { + if (s->master == ON_OFF_AUTO_AUTO) { + s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; + } + + if (!ivshmem_is_master(s)) { error_setg(&s->migration_blocker, "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); migrate_add_blocker(s->migration_blocker); @@ -990,7 +997,7 @@ static int ivshmem_pre_load(void *opaque) { IVShmemState *s = opaque; - if (s->role_val == IVSHMEM_PEER) { + if (!ivshmem_is_master(s)) { error_report("'peer' devices are not migratable"); return -EINVAL; } @@ -1020,9 +1027,9 @@ static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id) return -EINVAL; } - if (s->role_val == IVSHMEM_PEER) { - error_report("'peer' devices are not migratable"); - return -EINVAL; + ret = ivshmem_pre_load(s); + if (ret) { + return ret; } ret = pci_device_load(pdev, f); -- cgit 1.4.1 From 5400c02b90bb647a961f3210255178b68602bd5b Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:51 +0100 Subject: ivshmem: Split ivshmem-plain, ivshmem-doorbell off ivshmem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ivshmem can be configured with and without interrupt capability (a.k.a. "doorbell"). The two configurations have largely disjoint options, which makes for a confusing (and badly checked) user interface. Moreover, the device can't tell the guest whether its doorbell is enabled. Create two new device models ivshmem-plain and ivshmem-doorbell, and deprecate the old one. Changes from ivshmem: * PCI revision is 1 instead of 0. The new revision is fully backwards compatible for guests. Guests may elect to require at least revision 1 to make sure they're not exposed to the funny "no shared memory, yet" state. * Property "role" replaced by "master". role=master becomes master=on, role=peer becomes master=off. Default is off instead of auto. * Property "use64" is gone. The new devices always have 64 bit BARs. Changes from ivshmem to ivshmem-plain: * The Interrupt Pin register in PCI config space is zero (does not use an interrupt pin) instead of one (uses INTA). * Property "x-memdev" is renamed to "memdev". * Properties "shm" and "size" are gone. Use property "memdev" instead. * Property "msi" is gone. The new device can't have MSI-X capability. It can't interrupt anyway. * Properties "ioeventfd" and "vectors" are gone. They're meaningless without interrupts anyway. Changes from ivshmem to ivshmem-doorbell: * Property "msi" is gone. The new device always has MSI-X capability. * Property "ioeventfd" defaults to on instead of off. * Property "size" is gone. The new device can only map all the shared memory received from the server. Guests can easily find out whether the device is configured for interrupts by checking for MSI-X capability. Note: some code added in sub-optimal places to make the diff easier to review. The next commit will move it to more sensible places. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-37-git-send-email-armbru@redhat.com> --- docs/specs/ivshmem-spec.txt | 66 ++++----- hw/misc/ivshmem.c | 329 ++++++++++++++++++++++++++++++++------------ qemu-doc.texi | 33 +++-- tests/ivshmem-test.c | 12 +- 4 files changed, 304 insertions(+), 136 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/docs/specs/ivshmem-spec.txt b/docs/specs/ivshmem-spec.txt index 4c33973552..f3912c0565 100644 --- a/docs/specs/ivshmem-spec.txt +++ b/docs/specs/ivshmem-spec.txt @@ -17,9 +17,10 @@ get interrupted by its peers. There are two basic configurations: -- Just shared memory: -device ivshmem,shm=NAME,... +- Just shared memory: -device ivshmem-plain,memdev=HMB,... - This uses shared memory object NAME. + This uses host memory backend HMB. It should have option "share" + set. - Shared memory plus interrupts: -device ivshmem,chardev=CHR,vectors=N,... @@ -30,9 +31,8 @@ There are two basic configurations: Each peer gets assigned a unique ID by the server. IDs must be between 0 and 65535. - Interrupts are message-signaled by default (MSI-X). With msi=off - the device has no MSI-X capability, and uses legacy INTx instead. - vectors=N configures the number of vectors to use. + Interrupts are message-signaled (MSI-X). vectors=N configures the + number of vectors to use. For more details on ivshmem device properties, see The QEMU Emulator User Documentation (qemu-doc.*). @@ -40,14 +40,15 @@ User Documentation (qemu-doc.*). == The ivshmem PCI device's guest interface == -The device has vendor ID 1af4, device ID 1110, revision 0. +The device has vendor ID 1af4, device ID 1110, revision 1. Before +QEMU 2.6.0, it had revision 0. === PCI BARs === The ivshmem PCI device has two or three BARs: - BAR0 holds device registers (256 Byte MMIO) -- BAR1 holds MSI-X table and PBA (only when using MSI-X) +- BAR1 holds MSI-X table and PBA (only ivshmem-doorbell) - BAR2 maps the shared memory object There are two ways to use this device: @@ -58,18 +59,19 @@ There are two ways to use this device: user space (see http://dpdk.org/browse/memnic). - If you additionally need the capability for peers to interrupt each - other, you need BAR0 and, if using MSI-X, BAR1. You will most - likely want to write a kernel driver to handle interrupts. Requires - the device to be configured for interrupts, obviously. + other, you need BAR0 and BAR1. You will most likely want to write a + kernel driver to handle interrupts. Requires the device to be + configured for interrupts, obviously. Before QEMU 2.6.0, BAR2 can initially be invalid if the device is configured for interrupts. It becomes safely accessible only after -the ivshmem server provided the shared memory. Guest software should -wait for the IVPosition register (described below) to become -non-negative before accessing BAR2. +the ivshmem server provided the shared memory. These devices have PCI +revision 0 rather than 1. Guest software should wait for the +IVPosition register (described below) to become non-negative before +accessing BAR2. -The device is not capable to tell guest software whether it is -configured for interrupts. +Revision 0 of the device is not capable to tell guest software whether +it is configured for interrupts. === PCI device registers === @@ -77,10 +79,12 @@ BAR 0 contains the following registers: Offset Size Access On reset Function 0 4 read/write 0 Interrupt Mask - bit 0: peer interrupt + bit 0: peer interrupt (rev 0) + reserved (rev 1) bit 1..31: reserved 4 4 read/write 0 Interrupt Status - bit 0: peer interrupt + bit 0: peer interrupt (rev 0) + reserved (rev 1) bit 1..31: reserved 8 4 read-only 0 or ID IVPosition 12 4 write-only N/A Doorbell @@ -92,18 +96,18 @@ Software should only access the registers as specified in column "Access". Reserved bits should be ignored on read, and preserved on write. -Interrupt Status and Mask Register together control the legacy INTx -interrupt when the device has no MSI-X capability: INTx is asserted -when the bit-wise AND of Status and Mask is non-zero and the device -has no MSI-X capability. Interrupt Status Register bit 0 becomes 1 -when an interrupt request from a peer is received. Reading the -register clears it. +In revision 0 of the device, Interrupt Status and Mask Register +together control the legacy INTx interrupt when the device has no +MSI-X capability: INTx is asserted when the bit-wise AND of Status and +Mask is non-zero and the device has no MSI-X capability. Interrupt +Status Register bit 0 becomes 1 when an interrupt request from a peer +is received. Reading the register clears it. IVPosition Register: if the device is not configured for interrupts, this is zero. Else, it is the device's ID (between 0 and 65535). Before QEMU 2.6.0, the register may read -1 for a short while after -reset. +reset. These devices have PCI revision 0 rather than 1. There is no good way for software to find out whether the device is configured for interrupts. A positive IVPosition means interrupts, @@ -124,14 +128,14 @@ interrupt vectors connected, the write is ignored. The device is not capable to tell guest software what peers are connected, or how many interrupt vectors are connected. -If the peer doesn't use MSI-X, its Interrupt Status register is set to -1. This asserts INTx unless masked by the Interrupt Mask register. -The device is not capable to communicate the interrupt vector to guest -software then. +The peer's interrupt for this vector then becomes pending. There is +no way for software to clear the pending bit, and a polling mode of +operation is therefore impossible. -If the peer uses MSI-X, the interrupt for this vector becomes pending. -There is no way for software to clear the pending bit, and a polling -mode of operation is therefore impossible with MSI-X. +If the peer is a revision 0 device without MSI-X capability, its +Interrupt Status register is set to 1. This asserts INTx unless +masked by the Interrupt Mask register. The device is not capable to +communicate the interrupt vector to guest software then. With multiple MSI-X vectors, different vectors can be used to indicate different events have occurred. The semantics of interrupt vectors diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index f903fae5f9..89076e4ba3 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -29,6 +29,7 @@ #include "qom/object_interfaces.h" #include "sysemu/char.h" #include "sysemu/hostmem.h" +#include "sysemu/qtest.h" #include "qapi/visitor.h" #include "exec/ram_addr.h" @@ -53,6 +54,18 @@ } \ } while (0) +#define TYPE_IVSHMEM_COMMON "ivshmem-common" +#define IVSHMEM_COMMON(obj) \ + OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON) + +#define TYPE_IVSHMEM_PLAIN "ivshmem-plain" +#define IVSHMEM_PLAIN(obj) \ + OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN) + +#define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell" +#define IVSHMEM_DOORBELL(obj) \ + OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL) + #define TYPE_IVSHMEM "ivshmem" #define IVSHMEM(obj) \ OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM) @@ -81,8 +94,6 @@ typedef struct IVShmemState { MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */ MemoryRegion server_bar2; /* used with server_chr */ - size_t ivshmem_size; /* size of shared memory region */ - uint32_t ivshmem_64bit; Peer *peers; int nb_peers; /* space in @peers[] */ @@ -97,9 +108,12 @@ typedef struct IVShmemState { OnOffAuto master; Error *migration_blocker; - char * shmobj; - char * sizearg; - char * role; + /* legacy cruft */ + char *role; + char *shmobj; + char *sizearg; + size_t legacy_size; + uint32_t not_legacy_32bit; } IVShmemState; /* registers for the Inter-VM shared memory device */ @@ -126,8 +140,21 @@ static void ivshmem_update_irq(IVShmemState *s) PCIDevice *d = PCI_DEVICE(s); uint32_t isr = s->intrstatus & s->intrmask; - /* No INTx with msi=on, whether the guest enabled MSI-X or not */ - if (ivshmem_has_feature(s, IVSHMEM_MSI)) { + /* + * Do nothing unless the device actually uses INTx. Here's how + * the device variants signal interrupts, what they put in PCI + * config space: + * Device variant Interrupt Interrupt Pin MSI-X cap. + * ivshmem-plain none 0 no + * ivshmem-doorbell MSI-X 1 yes(1) + * ivshmem,msi=off INTx 1 no + * ivshmem,msi=on MSI-X 1(2) yes(1) + * (1) if guest enabled MSI-X + * (2) the device lies + * Leads to the condition for doing nothing: + */ + if (ivshmem_has_feature(s, IVSHMEM_MSI) + || !d->config[PCI_INTERRUPT_PIN]) { return; } @@ -259,7 +286,7 @@ static void ivshmem_vector_notify(void *opaque) { MSIVector *entry = opaque; PCIDevice *pdev = entry->pdev; - IVShmemState *s = IVSHMEM(pdev); + IVShmemState *s = IVSHMEM_COMMON(pdev); int vector = entry - s->msi_vectors; EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; @@ -280,7 +307,7 @@ static void ivshmem_vector_notify(void *opaque) static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, MSIMessage msg) { - IVShmemState *s = IVSHMEM(dev); + IVShmemState *s = IVSHMEM_COMMON(dev); EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; MSIVector *v = &s->msi_vectors[vector]; int ret; @@ -297,7 +324,7 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) { - IVShmemState *s = IVSHMEM(dev); + IVShmemState *s = IVSHMEM_COMMON(dev); EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; int ret; @@ -314,7 +341,7 @@ static void ivshmem_vector_poll(PCIDevice *dev, unsigned int vector_start, unsigned int vector_end) { - IVShmemState *s = IVSHMEM(dev); + IVShmemState *s = IVSHMEM_COMMON(dev); unsigned int vector; IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end); @@ -461,6 +488,7 @@ static void setup_interrupt(IVShmemState *s, int vector, Error **errp) static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) { struct stat buf; + size_t size; void *ptr; if (s->ivshmem_bar2) { @@ -476,22 +504,28 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) return; } - if (s->ivshmem_size > buf.st_size) { - error_setg(errp, "server sent only %zd bytes of shared memory", - (size_t)buf.st_size); - close(fd); - return; + size = buf.st_size; + + /* Legacy cruft */ + if (s->legacy_size != SIZE_MAX) { + if (size < s->legacy_size) { + error_setg(errp, "server sent only %zd bytes of shared memory", + (size_t)buf.st_size); + close(fd); + return; + } + size = s->legacy_size; } /* mmap the region and map into the BAR2 */ - ptr = mmap(0, s->ivshmem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (ptr == MAP_FAILED) { error_setg_errno(errp, errno, "Failed to mmap shared memory"); close(fd); return; } memory_region_init_ram_ptr(&s->server_bar2, OBJECT(s), - "ivshmem.bar2", s->ivshmem_size, ptr); + "ivshmem.bar2", size, ptr); qemu_set_ram_fd(memory_region_get_ram_addr(&s->server_bar2), fd); s->ivshmem_bar2 = &s->server_bar2; } @@ -703,7 +737,7 @@ static void ivshmem_msix_vector_use(IVShmemState *s) static void ivshmem_reset(DeviceState *d) { - IVShmemState *s = IVSHMEM(d); + IVShmemState *s = IVSHMEM_COMMON(d); s->intrstatus = 0; s->intrmask = 0; @@ -781,7 +815,7 @@ static void ivshmem_disable_irqfd(IVShmemState *s) static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, uint32_t val, int len) { - IVShmemState *s = IVSHMEM(pdev); + IVShmemState *s = IVSHMEM_COMMON(pdev); int is_enabled, was_enabled = msix_enabled(pdev); pci_default_write_config(pdev, address, val, len); @@ -805,7 +839,7 @@ static void desugar_shm(IVShmemState *s) path = g_strdup_printf("/dev/shm/%s", s->shmobj); object_property_set_str(obj, path, "mem-path", &error_abort); g_free(path); - object_property_set_int(obj, s->ivshmem_size, "size", &error_abort); + object_property_set_int(obj, s->legacy_size, "size", &error_abort); object_property_set_bool(obj, true, "share", &error_abort); object_property_add_child(OBJECT(s), "internal-shm-backend", obj, &error_abort); @@ -813,42 +847,14 @@ static void desugar_shm(IVShmemState *s) s->hostmem = MEMORY_BACKEND(obj); } -static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) +static void ivshmem_common_realize(PCIDevice *dev, Error **errp) { - IVShmemState *s = IVSHMEM(dev); + IVShmemState *s = IVSHMEM_COMMON(dev); Error *err = NULL; uint8_t *pci_conf; uint8_t attr = PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_PREFETCH; - if (!!s->server_chr + !!s->shmobj + !!s->hostmem != 1) { - error_setg(errp, - "You must specify either 'shm', 'chardev' or 'x-memdev'"); - return; - } - - if (s->hostmem) { - MemoryRegion *mr; - - if (s->sizearg) { - g_warning("size argument ignored with hostmem"); - } - - mr = host_memory_backend_get_memory(s->hostmem, &error_abort); - s->ivshmem_size = memory_region_size(mr); - } else if (s->sizearg == NULL) { - s->ivshmem_size = 4 << 20; /* 4 MB default */ - } else { - char *end; - int64_t size = qemu_strtosz(s->sizearg, &end); - if (size < 0 || (size_t)size != size || *end != '\0' - || !is_power_of_2(size)) { - error_setg(errp, "Invalid size %s", s->sizearg); - return; - } - s->ivshmem_size = size; - } - /* IRQFD requires MSI */ if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) && !ivshmem_has_feature(s, IVSHMEM_MSI)) { @@ -856,29 +862,9 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) return; } - /* check that role is reasonable */ - if (s->role) { - if (strncmp(s->role, "peer", 5) == 0) { - s->master = ON_OFF_AUTO_OFF; - } else if (strncmp(s->role, "master", 7) == 0) { - s->master = ON_OFF_AUTO_ON; - } else { - error_setg(errp, "'role' must be 'peer' or 'master'"); - return; - } - } else { - s->master = ON_OFF_AUTO_AUTO; - } - pci_conf = dev->config; pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; - /* - * Note: we don't use INTx with IVSHMEM_MSI at all, so this is a - * bald-faced lie then. But it's a backwards compatible lie. - */ - pci_config_set_interrupt_pin(pci_conf, 1); - memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s, "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE); @@ -886,14 +872,10 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->ivshmem_mmio); - if (s->ivshmem_64bit) { + if (!s->not_legacy_32bit) { attr |= PCI_BASE_ADDRESS_MEM_TYPE_64; } - if (s->shmobj) { - desugar_shm(s); - } - if (s->hostmem != NULL) { IVSHMEM_DPRINTF("using hostmem\n"); @@ -940,9 +922,68 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp) } } -static void pci_ivshmem_exit(PCIDevice *dev) +static void ivshmem_realize(PCIDevice *dev, Error **errp) { - IVShmemState *s = IVSHMEM(dev); + IVShmemState *s = IVSHMEM_COMMON(dev); + + if (!qtest_enabled()) { + error_report("ivshmem is deprecated, please use ivshmem-plain" + " or ivshmem-doorbell instead"); + } + + if (!!s->server_chr + !!s->shmobj + !!s->hostmem != 1) { + error_setg(errp, + "You must specify either 'shm', 'chardev' or 'x-memdev'"); + return; + } + + if (s->hostmem) { + if (s->sizearg) { + g_warning("size argument ignored with hostmem"); + } + } else if (s->sizearg == NULL) { + s->legacy_size = 4 << 20; /* 4 MB default */ + } else { + char *end; + int64_t size = qemu_strtosz(s->sizearg, &end); + if (size < 0 || (size_t)size != size || *end != '\0' + || !is_power_of_2(size)) { + error_setg(errp, "Invalid size %s", s->sizearg); + return; + } + s->legacy_size = size; + } + + /* check that role is reasonable */ + if (s->role) { + if (strncmp(s->role, "peer", 5) == 0) { + s->master = ON_OFF_AUTO_OFF; + } else if (strncmp(s->role, "master", 7) == 0) { + s->master = ON_OFF_AUTO_ON; + } else { + error_setg(errp, "'role' must be 'peer' or 'master'"); + return; + } + } else { + s->master = ON_OFF_AUTO_AUTO; + } + + if (s->shmobj) { + desugar_shm(s); + } + + /* + * Note: we don't use INTx with IVSHMEM_MSI at all, so this is a + * bald-faced lie then. But it's a backwards compatible lie. + */ + pci_config_set_interrupt_pin(dev->config, 1); + + ivshmem_common_realize(dev, errp); +} + +static void ivshmem_exit(PCIDevice *dev) +{ + IVShmemState *s = IVSHMEM_COMMON(dev); int i; if (s->migration_blocker) { @@ -955,7 +996,7 @@ static void pci_ivshmem_exit(PCIDevice *dev) void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2); int fd; - if (munmap(addr, s->ivshmem_size) == -1) { + if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) { error_report("Failed to munmap shared memory %s", strerror(errno)); } @@ -1075,28 +1116,39 @@ static Property ivshmem_properties[] = { DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true), DEFINE_PROP_STRING("shm", IVShmemState, shmobj), DEFINE_PROP_STRING("role", IVShmemState, role), - DEFINE_PROP_UINT32("use64", IVShmemState, ivshmem_64bit, 1), + DEFINE_PROP_UINT32("use64", IVShmemState, not_legacy_32bit, 1), DEFINE_PROP_END_OF_LIST(), }; -static void ivshmem_class_init(ObjectClass *klass, void *data) +static void ivshmem_common_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - k->realize = pci_ivshmem_realize; - k->exit = pci_ivshmem_exit; + k->realize = ivshmem_common_realize; + k->exit = ivshmem_exit; k->config_write = ivshmem_write_config; k->vendor_id = PCI_VENDOR_ID_IVSHMEM; k->device_id = PCI_DEVICE_ID_IVSHMEM; k->class_id = PCI_CLASS_MEMORY_RAM; + k->revision = 1; dc->reset = ivshmem_reset; - dc->props = ivshmem_properties; - dc->vmsd = &ivshmem_vmsd; set_bit(DEVICE_CATEGORY_MISC, dc->categories); dc->desc = "Inter-VM shared memory"; } +static void ivshmem_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = ivshmem_realize; + k->revision = 0; + dc->desc = "Inter-VM shared memory (legacy)"; + dc->props = ivshmem_properties; + dc->vmsd = &ivshmem_vmsd; +} + static void ivshmem_check_memdev_is_busy(Object *obj, const char *name, Object *val, Error **errp) { @@ -1123,16 +1175,121 @@ static void ivshmem_init(Object *obj) &error_abort); } +static const TypeInfo ivshmem_common_info = { + .name = TYPE_IVSHMEM_COMMON, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(IVShmemState), + .abstract = true, + .class_init = ivshmem_common_class_init, +}; + static const TypeInfo ivshmem_info = { .name = TYPE_IVSHMEM, - .parent = TYPE_PCI_DEVICE, + .parent = TYPE_IVSHMEM_COMMON, .instance_size = sizeof(IVShmemState), .instance_init = ivshmem_init, .class_init = ivshmem_class_init, }; +static const VMStateDescription ivshmem_plain_vmsd = { + .name = TYPE_IVSHMEM_PLAIN, + .version_id = 0, + .minimum_version_id = 0, + .pre_load = ivshmem_pre_load, + .post_load = ivshmem_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), + VMSTATE_UINT32(intrstatus, IVShmemState), + VMSTATE_UINT32(intrmask, IVShmemState), + VMSTATE_END_OF_LIST() + }, +}; + +static Property ivshmem_plain_properties[] = { + DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), + DEFINE_PROP_END_OF_LIST(), +}; + +static void ivshmem_plain_init(Object *obj) +{ + IVShmemState *s = IVSHMEM_PLAIN(obj); + + object_property_add_link(obj, "memdev", TYPE_MEMORY_BACKEND, + (Object **)&s->hostmem, + ivshmem_check_memdev_is_busy, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &error_abort); +} + +static void ivshmem_plain_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->props = ivshmem_plain_properties; + dc->vmsd = &ivshmem_plain_vmsd; +} + +static const TypeInfo ivshmem_plain_info = { + .name = TYPE_IVSHMEM_PLAIN, + .parent = TYPE_IVSHMEM_COMMON, + .instance_size = sizeof(IVShmemState), + .instance_init = ivshmem_plain_init, + .class_init = ivshmem_plain_class_init, +}; + +static const VMStateDescription ivshmem_doorbell_vmsd = { + .name = TYPE_IVSHMEM_DOORBELL, + .version_id = 0, + .minimum_version_id = 0, + .pre_load = ivshmem_pre_load, + .post_load = ivshmem_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), + VMSTATE_MSIX(parent_obj, IVShmemState), + VMSTATE_UINT32(intrstatus, IVShmemState), + VMSTATE_UINT32(intrmask, IVShmemState), + VMSTATE_END_OF_LIST() + }, +}; + +static Property ivshmem_doorbell_properties[] = { + DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), + DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), + DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, + true), + DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), + DEFINE_PROP_END_OF_LIST(), +}; + +static void ivshmem_doorbell_init(Object *obj) +{ + IVShmemState *s = IVSHMEM_DOORBELL(obj); + + s->features |= (1 << IVSHMEM_MSI); + s->legacy_size = SIZE_MAX; /* whatever the server sends */ +} + +static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->props = ivshmem_doorbell_properties; + dc->vmsd = &ivshmem_doorbell_vmsd; +} + +static const TypeInfo ivshmem_doorbell_info = { + .name = TYPE_IVSHMEM_DOORBELL, + .parent = TYPE_IVSHMEM_COMMON, + .instance_size = sizeof(IVShmemState), + .instance_init = ivshmem_doorbell_init, + .class_init = ivshmem_doorbell_class_init, +}; + static void ivshmem_register_types(void) { + type_register_static(&ivshmem_common_info); + type_register_static(&ivshmem_plain_info); + type_register_static(&ivshmem_doorbell_info); type_register_static(&ivshmem_info); } diff --git a/qemu-doc.texi b/qemu-doc.texi index 8afbbcdfea..0dd01c78fd 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -1262,13 +1262,18 @@ basic example. @subsection Inter-VM Shared Memory device -With KVM enabled on a Linux host, a shared memory device is available. Guests -map a POSIX shared memory region into the guest as a PCI device that enables -zero-copy communication to the application level of the guests. The basic -syntax is: +On Linux hosts, a shared memory device is available. The basic syntax +is: @example -qemu-system-i386 -device ivshmem,size=@var{size},shm=@var{shm-name} +qemu-system-x86_64 -device ivshmem-plain,memdev=@var{hostmem} +@end example + +where @var{hostmem} names a host memory backend. For a POSIX shared +memory backend, use something like + +@example +-object memory-backend-file,size=1M,share,mem-path=/dev/shm/ivshmem,id=@var{hostmem} @end example If desired, interrupts can be sent between guest VMs accessing the same shared @@ -1282,8 +1287,7 @@ memory server is: ivshmem-server -p @var{pidfile} -S @var{path} -m @var{shm-name} -l @var{shm-size} -n @var{vectors} # Then start your qemu instances with matching arguments -qemu-system-i386 -device ivshmem,size=@var{shm-size},vectors=@var{vectors},chardev=@var{id} - [,msi=on][,ioeventfd=on][,role=peer|master] +qemu-system-x86_64 -device ivshmem-doorbell,vectors=@var{vectors},chardev=@var{id} -chardev socket,path=@var{path},id=@var{id} @end example @@ -1291,12 +1295,11 @@ When using the server, the guest will be assigned a VM ID (>=0) that allows gues using the same server to communicate via interrupts. Guests can read their VM ID from a device register (see ivshmem-spec.txt). -The @option{role} argument can be set to either master or peer and will affect -how the shared memory is migrated. With @option{role=master}, the guest will -copy the shared memory on migration to the destination host. With -@option{role=peer}, the guest will not be able to migrate with the device attached. -With the @option{peer} case, the device should be detached and then reattached -after migration using the PCI hotplug support. +With device property @option{master=on}, the guest will copy the shared +memory on migration to the destination host. With @option{master=off}, +the guest will not be able to migrate with the device attached. In the +latter case, the device should be detached and then reattached after +migration using the PCI hotplug support. @subsubsection ivshmem and hugepages @@ -1304,8 +1307,8 @@ Instead of specifying the using POSIX shm, you may specify a memory backend that has hugepage support: @example -qemu-system-i386 -object memory-backend-file,size=1G,mem-path=/dev/hugepages/my-shmem-file,share,id=mb1 - -device ivshmem,x-memdev=mb1 +qemu-system-x86_64 -object memory-backend-file,size=1G,mem-path=/dev/hugepages/my-shmem-file,share,id=mb1 + -device ivshmem-plain,memdev=mb1 @end example ivshmem-server also supports hugepages mount points with the diff --git a/tests/ivshmem-test.c b/tests/ivshmem-test.c index c7f3758df0..c027ff1e09 100644 --- a/tests/ivshmem-test.c +++ b/tests/ivshmem-test.c @@ -127,7 +127,9 @@ static void setup_vm_cmd(IVState *s, const char *cmd, bool msix) static void setup_vm(IVState *s) { - char *cmd = g_strdup_printf("-device ivshmem,shm=%s,size=1M", tmpshm); + char *cmd = g_strdup_printf("-object memory-backend-file" + ",id=mb1,size=1M,share,mem-path=/dev/shm%s" + " -device ivshmem-plain,memdev=mb1", tmpshm); setup_vm_cmd(s, cmd, false); @@ -284,8 +286,10 @@ static void *server_thread(void *data) static void setup_vm_with_server(IVState *s, int nvectors, bool msi) { char *cmd = g_strdup_printf("-chardev socket,id=chr0,path=%s,nowait " - "-device ivshmem,size=1M,chardev=chr0,vectors=%d,msi=%s", - tmpserver, nvectors, msi ? "true" : "false"); + "-device ivshmem%s,chardev=chr0,vectors=%d", + tmpserver, + msi ? "-doorbell" : ",size=1M,msi=off", + nvectors); setup_vm_cmd(s, cmd, msi); @@ -412,7 +416,7 @@ static void test_ivshmem_memdev(void) /* just for the sake of checking memory-backend property */ setup_vm_cmd(&state, "-object memory-backend-ram,size=1M,id=mb1" - " -device ivshmem,x-memdev=mb1", false); + " -device ivshmem-plain,memdev=mb1", false); cleanup_vm(&state); } -- cgit 1.4.1 From ddc852844388bb43a39319f0be8a4c4af3a4c526 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:52 +0100 Subject: ivshmem: Clean up after the previous commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move code to more sensible places. Use the opportunity to reorder and document IVShmemState members. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-38-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 400 +++++++++++++++++++++++++++--------------------------- 1 file changed, 203 insertions(+), 197 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 89076e4ba3..527d6362dd 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -85,26 +85,31 @@ typedef struct IVShmemState { PCIDevice parent_obj; /*< public >*/ - HostMemoryBackend *hostmem; + uint32_t features; + + /* exactly one of these two may be set */ + HostMemoryBackend *hostmem; /* with interrupts */ + CharDriverState *server_chr; /* without interrupts */ + + /* registers */ uint32_t intrmask; uint32_t intrstatus; + int vm_id; - CharDriverState *server_chr; - MemoryRegion ivshmem_mmio; - + /* BARs */ + MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */ MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */ MemoryRegion server_bar2; /* used with server_chr */ + /* interrupt support */ Peer *peers; int nb_peers; /* space in @peers[] */ - - int vm_id; uint32_t vectors; - uint32_t features; MSIVector *msi_vectors; uint64_t msg_buf; /* buffer for receiving server messages */ int msg_buffered_bytes; /* #bytes in @msg_buf */ + /* migration stuff */ OnOffAuto master; Error *migration_blocker; @@ -830,23 +835,6 @@ static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, } } -static void desugar_shm(IVShmemState *s) -{ - Object *obj; - char *path; - - obj = object_new("memory-backend-file"); - path = g_strdup_printf("/dev/shm/%s", s->shmobj); - object_property_set_str(obj, path, "mem-path", &error_abort); - g_free(path); - object_property_set_int(obj, s->legacy_size, "size", &error_abort); - object_property_set_bool(obj, true, "share", &error_abort); - object_property_add_child(OBJECT(s), "internal-shm-backend", obj, - &error_abort); - user_creatable_complete(obj, &error_abort); - s->hostmem = MEMORY_BACKEND(obj); -} - static void ivshmem_common_realize(PCIDevice *dev, Error **errp) { IVShmemState *s = IVSHMEM_COMMON(dev); @@ -922,65 +910,6 @@ static void ivshmem_common_realize(PCIDevice *dev, Error **errp) } } -static void ivshmem_realize(PCIDevice *dev, Error **errp) -{ - IVShmemState *s = IVSHMEM_COMMON(dev); - - if (!qtest_enabled()) { - error_report("ivshmem is deprecated, please use ivshmem-plain" - " or ivshmem-doorbell instead"); - } - - if (!!s->server_chr + !!s->shmobj + !!s->hostmem != 1) { - error_setg(errp, - "You must specify either 'shm', 'chardev' or 'x-memdev'"); - return; - } - - if (s->hostmem) { - if (s->sizearg) { - g_warning("size argument ignored with hostmem"); - } - } else if (s->sizearg == NULL) { - s->legacy_size = 4 << 20; /* 4 MB default */ - } else { - char *end; - int64_t size = qemu_strtosz(s->sizearg, &end); - if (size < 0 || (size_t)size != size || *end != '\0' - || !is_power_of_2(size)) { - error_setg(errp, "Invalid size %s", s->sizearg); - return; - } - s->legacy_size = size; - } - - /* check that role is reasonable */ - if (s->role) { - if (strncmp(s->role, "peer", 5) == 0) { - s->master = ON_OFF_AUTO_OFF; - } else if (strncmp(s->role, "master", 7) == 0) { - s->master = ON_OFF_AUTO_ON; - } else { - error_setg(errp, "'role' must be 'peer' or 'master'"); - return; - } - } else { - s->master = ON_OFF_AUTO_AUTO; - } - - if (s->shmobj) { - desugar_shm(s); - } - - /* - * Note: we don't use INTx with IVSHMEM_MSI at all, so this is a - * bald-faced lie then. But it's a backwards compatible lie. - */ - pci_config_set_interrupt_pin(dev->config, 1); - - ivshmem_common_realize(dev, errp); -} - static void ivshmem_exit(PCIDevice *dev) { IVShmemState *s = IVSHMEM_COMMON(dev); @@ -1022,18 +951,6 @@ static void ivshmem_exit(PCIDevice *dev) g_free(s->msi_vectors); } -static bool test_msix(void *opaque, int version_id) -{ - IVShmemState *s = opaque; - - return ivshmem_has_feature(s, IVSHMEM_MSI); -} - -static bool test_no_msix(void *opaque, int version_id) -{ - return !test_msix(opaque, version_id); -} - static int ivshmem_pre_load(void *opaque) { IVShmemState *s = opaque; @@ -1056,70 +973,6 @@ static int ivshmem_post_load(void *opaque, int version_id) return 0; } -static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id) -{ - IVShmemState *s = opaque; - PCIDevice *pdev = PCI_DEVICE(s); - int ret; - - IVSHMEM_DPRINTF("ivshmem_load_old\n"); - - if (version_id != 0) { - return -EINVAL; - } - - ret = ivshmem_pre_load(s); - if (ret) { - return ret; - } - - ret = pci_device_load(pdev, f); - if (ret) { - return ret; - } - - if (ivshmem_has_feature(s, IVSHMEM_MSI)) { - msix_load(pdev, f); - ivshmem_msix_vector_use(s); - } else { - s->intrstatus = qemu_get_be32(f); - s->intrmask = qemu_get_be32(f); - } - - return 0; -} - -static const VMStateDescription ivshmem_vmsd = { - .name = "ivshmem", - .version_id = 1, - .minimum_version_id = 1, - .pre_load = ivshmem_pre_load, - .post_load = ivshmem_post_load, - .fields = (VMStateField[]) { - VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), - - VMSTATE_MSIX_TEST(parent_obj, IVShmemState, test_msix), - VMSTATE_UINT32_TEST(intrstatus, IVShmemState, test_no_msix), - VMSTATE_UINT32_TEST(intrmask, IVShmemState, test_no_msix), - - VMSTATE_END_OF_LIST() - }, - .load_state_old = ivshmem_load_old, - .minimum_version_id_old = 0 -}; - -static Property ivshmem_properties[] = { - DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), - DEFINE_PROP_STRING("size", IVShmemState, sizearg), - DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), - DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, false), - DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true), - DEFINE_PROP_STRING("shm", IVShmemState, shmobj), - DEFINE_PROP_STRING("role", IVShmemState, role), - DEFINE_PROP_UINT32("use64", IVShmemState, not_legacy_32bit, 1), - DEFINE_PROP_END_OF_LIST(), -}; - static void ivshmem_common_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -1137,17 +990,13 @@ static void ivshmem_common_class_init(ObjectClass *klass, void *data) dc->desc = "Inter-VM shared memory"; } -static void ivshmem_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - - k->realize = ivshmem_realize; - k->revision = 0; - dc->desc = "Inter-VM shared memory (legacy)"; - dc->props = ivshmem_properties; - dc->vmsd = &ivshmem_vmsd; -} +static const TypeInfo ivshmem_common_info = { + .name = TYPE_IVSHMEM_COMMON, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(IVShmemState), + .abstract = true, + .class_init = ivshmem_common_class_init, +}; static void ivshmem_check_memdev_is_busy(Object *obj, const char *name, Object *val, Error **errp) @@ -1164,33 +1013,6 @@ static void ivshmem_check_memdev_is_busy(Object *obj, const char *name, } } -static void ivshmem_init(Object *obj) -{ - IVShmemState *s = IVSHMEM(obj); - - object_property_add_link(obj, "x-memdev", TYPE_MEMORY_BACKEND, - (Object **)&s->hostmem, - ivshmem_check_memdev_is_busy, - OBJ_PROP_LINK_UNREF_ON_RELEASE, - &error_abort); -} - -static const TypeInfo ivshmem_common_info = { - .name = TYPE_IVSHMEM_COMMON, - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(IVShmemState), - .abstract = true, - .class_init = ivshmem_common_class_init, -}; - -static const TypeInfo ivshmem_info = { - .name = TYPE_IVSHMEM, - .parent = TYPE_IVSHMEM_COMMON, - .instance_size = sizeof(IVShmemState), - .instance_init = ivshmem_init, - .class_init = ivshmem_class_init, -}; - static const VMStateDescription ivshmem_plain_vmsd = { .name = TYPE_IVSHMEM_PLAIN, .version_id = 0, @@ -1285,6 +1107,190 @@ static const TypeInfo ivshmem_doorbell_info = { .class_init = ivshmem_doorbell_class_init, }; +static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id) +{ + IVShmemState *s = opaque; + PCIDevice *pdev = PCI_DEVICE(s); + int ret; + + IVSHMEM_DPRINTF("ivshmem_load_old\n"); + + if (version_id != 0) { + return -EINVAL; + } + + ret = ivshmem_pre_load(s); + if (ret) { + return ret; + } + + ret = pci_device_load(pdev, f); + if (ret) { + return ret; + } + + if (ivshmem_has_feature(s, IVSHMEM_MSI)) { + msix_load(pdev, f); + ivshmem_msix_vector_use(s); + } else { + s->intrstatus = qemu_get_be32(f); + s->intrmask = qemu_get_be32(f); + } + + return 0; +} + +static bool test_msix(void *opaque, int version_id) +{ + IVShmemState *s = opaque; + + return ivshmem_has_feature(s, IVSHMEM_MSI); +} + +static bool test_no_msix(void *opaque, int version_id) +{ + return !test_msix(opaque, version_id); +} + +static const VMStateDescription ivshmem_vmsd = { + .name = "ivshmem", + .version_id = 1, + .minimum_version_id = 1, + .pre_load = ivshmem_pre_load, + .post_load = ivshmem_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), + + VMSTATE_MSIX_TEST(parent_obj, IVShmemState, test_msix), + VMSTATE_UINT32_TEST(intrstatus, IVShmemState, test_no_msix), + VMSTATE_UINT32_TEST(intrmask, IVShmemState, test_no_msix), + + VMSTATE_END_OF_LIST() + }, + .load_state_old = ivshmem_load_old, + .minimum_version_id_old = 0 +}; + +static Property ivshmem_properties[] = { + DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), + DEFINE_PROP_STRING("size", IVShmemState, sizearg), + DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), + DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, + false), + DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true), + DEFINE_PROP_STRING("shm", IVShmemState, shmobj), + DEFINE_PROP_STRING("role", IVShmemState, role), + DEFINE_PROP_UINT32("use64", IVShmemState, not_legacy_32bit, 1), + DEFINE_PROP_END_OF_LIST(), +}; + +static void desugar_shm(IVShmemState *s) +{ + Object *obj; + char *path; + + obj = object_new("memory-backend-file"); + path = g_strdup_printf("/dev/shm/%s", s->shmobj); + object_property_set_str(obj, path, "mem-path", &error_abort); + g_free(path); + object_property_set_int(obj, s->legacy_size, "size", &error_abort); + object_property_set_bool(obj, true, "share", &error_abort); + object_property_add_child(OBJECT(s), "internal-shm-backend", obj, + &error_abort); + user_creatable_complete(obj, &error_abort); + s->hostmem = MEMORY_BACKEND(obj); +} + +static void ivshmem_realize(PCIDevice *dev, Error **errp) +{ + IVShmemState *s = IVSHMEM_COMMON(dev); + + if (!qtest_enabled()) { + error_report("ivshmem is deprecated, please use ivshmem-plain" + " or ivshmem-doorbell instead"); + } + + if (!!s->server_chr + !!s->shmobj + !!s->hostmem != 1) { + error_setg(errp, + "You must specify either 'shm', 'chardev' or 'x-memdev'"); + return; + } + + if (s->hostmem) { + if (s->sizearg) { + g_warning("size argument ignored with hostmem"); + } + } else if (s->sizearg == NULL) { + s->legacy_size = 4 << 20; /* 4 MB default */ + } else { + char *end; + int64_t size = qemu_strtosz(s->sizearg, &end); + if (size < 0 || (size_t)size != size || *end != '\0' + || !is_power_of_2(size)) { + error_setg(errp, "Invalid size %s", s->sizearg); + return; + } + s->legacy_size = size; + } + + /* check that role is reasonable */ + if (s->role) { + if (strncmp(s->role, "peer", 5) == 0) { + s->master = ON_OFF_AUTO_OFF; + } else if (strncmp(s->role, "master", 7) == 0) { + s->master = ON_OFF_AUTO_ON; + } else { + error_setg(errp, "'role' must be 'peer' or 'master'"); + return; + } + } else { + s->master = ON_OFF_AUTO_AUTO; + } + + if (s->shmobj) { + desugar_shm(s); + } + + /* + * Note: we don't use INTx with IVSHMEM_MSI at all, so this is a + * bald-faced lie then. But it's a backwards compatible lie. + */ + pci_config_set_interrupt_pin(dev->config, 1); + + ivshmem_common_realize(dev, errp); +} + +static void ivshmem_init(Object *obj) +{ + IVShmemState *s = IVSHMEM(obj); + + object_property_add_link(obj, "x-memdev", TYPE_MEMORY_BACKEND, + (Object **)&s->hostmem, + ivshmem_check_memdev_is_busy, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &error_abort); +} + +static void ivshmem_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = ivshmem_realize; + k->revision = 0; + dc->desc = "Inter-VM shared memory (legacy)"; + dc->props = ivshmem_properties; + dc->vmsd = &ivshmem_vmsd; +} + +static const TypeInfo ivshmem_info = { + .name = TYPE_IVSHMEM, + .parent = TYPE_IVSHMEM_COMMON, + .instance_size = sizeof(IVShmemState), + .instance_init = ivshmem_init, + .class_init = ivshmem_class_init, +}; + static void ivshmem_register_types(void) { type_register_static(&ivshmem_common_info); -- cgit 1.4.1 From 13fd2cb68953ac066116d444415f9d5ec7ce1200 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:53 +0100 Subject: ivshmem: Drop ivshmem property x-memdev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use ivshmem-plain instead. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-39-git-send-email-armbru@redhat.com> --- hw/misc/ivshmem.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) (limited to 'hw/misc/ivshmem.c') diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 527d6362dd..455206059f 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -1210,17 +1210,12 @@ static void ivshmem_realize(PCIDevice *dev, Error **errp) " or ivshmem-doorbell instead"); } - if (!!s->server_chr + !!s->shmobj + !!s->hostmem != 1) { - error_setg(errp, - "You must specify either 'shm', 'chardev' or 'x-memdev'"); + if (!!s->server_chr + !!s->shmobj != 1) { + error_setg(errp, "You must specify either 'shm' or 'chardev'"); return; } - if (s->hostmem) { - if (s->sizearg) { - g_warning("size argument ignored with hostmem"); - } - } else if (s->sizearg == NULL) { + if (s->sizearg == NULL) { s->legacy_size = 4 << 20; /* 4 MB default */ } else { char *end; @@ -1260,17 +1255,6 @@ static void ivshmem_realize(PCIDevice *dev, Error **errp) ivshmem_common_realize(dev, errp); } -static void ivshmem_init(Object *obj) -{ - IVShmemState *s = IVSHMEM(obj); - - object_property_add_link(obj, "x-memdev", TYPE_MEMORY_BACKEND, - (Object **)&s->hostmem, - ivshmem_check_memdev_is_busy, - OBJ_PROP_LINK_UNREF_ON_RELEASE, - &error_abort); -} - static void ivshmem_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -1287,7 +1271,6 @@ static const TypeInfo ivshmem_info = { .name = TYPE_IVSHMEM, .parent = TYPE_IVSHMEM_COMMON, .instance_size = sizeof(IVShmemState), - .instance_init = ivshmem_init, .class_init = ivshmem_class_init, }; -- cgit 1.4.1 From 62a830b688a93419baa89061b6b5faf8b5e10808 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 15 Mar 2016 19:34:54 +0100 Subject: ivshmem: Require master to have ID zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migration with ivshmem needs to be carefully orchestrated to work. Exactly one peer (the "master") migrates to the destination, all other peers need to unplug (and disconnect), migrate, plug back (and reconnect). This is sort of documented in qemu-doc. If peers connect on the destination before migration completes, the shared memory can get messed up. This isn't documented anywhere. Fix that in qemu-doc. To avoid messing up register IVPosition on migration, the server must assign the same ID on source and destination. ivshmem-spec.txt leaves ID assignment unspecified, however. Amend ivshmem-spec.txt to require the first client to receive ID zero. The example ivshmem-server complies: it always assigns the first unused ID. For a bit of additional safety, enforce ID zero for the master. This does nothing when we're not using a server, because the ID is zero for all peers then. Signed-off-by: Markus Armbruster Reviewed-by: Marc-André Lureau Message-Id: <1458066895-20632-40-git-send-email-armbru@redhat.com> --- docs/specs/ivshmem-spec.txt | 2 ++ hw/misc/ivshmem.c | 6 ++++++ qemu-doc.texi | 5 +++++ 3 files changed, 13 insertions(+) (limited to 'hw/misc/ivshmem.c') diff --git a/docs/specs/ivshmem-spec.txt b/docs/specs/ivshmem-spec.txt index f3912c0565..a1f5499796 100644 --- a/docs/specs/ivshmem-spec.txt +++ b/docs/specs/ivshmem-spec.txt @@ -164,6 +164,8 @@ For each new client that connects to the server, the server - sends interrupt setup messages to the new client (these contain file descriptors for receiving interrupts). +The first client to connect to the server receives ID zero. + When a client disconnects from the server, the server sends disconnect notifications to the other clients. diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 455206059f..132387f04b 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -887,6 +887,12 @@ static void ivshmem_common_realize(PCIDevice *dev, Error **errp) return; } + if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) { + error_setg(errp, + "master must connect to the server before any peers"); + return; + } + qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, ivshmem_read, NULL, s); diff --git a/qemu-doc.texi b/qemu-doc.texi index 0dd01c78fd..79141d3582 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -1295,12 +1295,17 @@ When using the server, the guest will be assigned a VM ID (>=0) that allows gues using the same server to communicate via interrupts. Guests can read their VM ID from a device register (see ivshmem-spec.txt). +@subsubsection Migration with ivshmem + With device property @option{master=on}, the guest will copy the shared memory on migration to the destination host. With @option{master=off}, the guest will not be able to migrate with the device attached. In the latter case, the device should be detached and then reattached after migration using the PCI hotplug support. +At most one of the devices sharing the same memory can be master. The +master must complete migration before you plug back the other devices. + @subsubsection ivshmem and hugepages Instead of specifying the using POSIX shm, you may specify -- cgit 1.4.1