summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--hw/i386/kvm/pci-assign.c2
-rw-r--r--hw/net/vhost_net.c45
-rw-r--r--hw/net/vmxnet3.c2
-rw-r--r--hw/pci-bridge/ioh3420.c7
-rw-r--r--hw/pci-bridge/pci_bridge_dev.c4
-rw-r--r--hw/pci-bridge/xio3130_downstream.c6
-rw-r--r--hw/pci-bridge/xio3130_upstream.c3
-rw-r--r--hw/pci/msi.c18
-rw-r--r--hw/pci/pci.c82
-rw-r--r--include/hw/pci/pci.h26
-rw-r--r--include/net/net.h1
-rw-r--r--include/net/vhost-user.h1
-rw-r--r--include/net/vhost_net.h3
-rw-r--r--include/sysemu/char.h7
-rw-r--r--net/vhost-user.c32
-rw-r--r--qemu-char.c8
-rw-r--r--tests/Makefile.include2
-rw-r--r--tests/vhost-user-bridge.c51
-rw-r--r--tests/vhost-user-test.c136
20 files changed, 391 insertions, 46 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index df990a8ff6..fe2279e648 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -780,6 +780,7 @@ F: hw/ipack/
 
 PCI
 M: Michael S. Tsirkin <mst@redhat.com>
+M: Marcel Apfelbaum <marcel@redhat.com>
 S: Supported
 F: include/hw/pci/*
 F: hw/misc/pci-testdev.c
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index dd9e73b6f5..f9c901471d 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1481,7 +1481,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp)
          * error bits, leave the rest. */
         status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS);
         status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN);
-        status |= pci_requester_id(pci_dev);
+        status |= pci_get_bdf(pci_dev);
         status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL |
                     PCI_X_STATUS_SPL_ERR);
         pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status);
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 6e1032fc18..50f4dcd655 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -120,6 +120,11 @@ uint64_t vhost_net_get_max_queues(VHostNetState *net)
     return net->dev.max_queues;
 }
 
+uint64_t vhost_net_get_acked_features(VHostNetState *net)
+{
+    return net->dev.acked_features;
+}
+
 static int vhost_net_get_fd(NetClientState *backend)
 {
     switch (backend->info->type) {
@@ -136,6 +141,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
     int r;
     bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
     struct vhost_net *net = g_malloc(sizeof *net);
+    uint64_t features = 0;
 
     if (!options->net_backend) {
         fprintf(stderr, "vhost-net requires net backend to be setup\n");
@@ -183,8 +189,21 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
             goto fail;
         }
     }
+
     /* Set sane init value. Override when guest acks. */
-    vhost_net_ack_features(net, 0);
+    if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) {
+        features = vhost_user_get_acked_features(net->nc);
+        if (~net->dev.features & features) {
+            fprintf(stderr, "vhost lacks feature mask %" PRIu64
+                    " for backend\n",
+                    (uint64_t)(~net->dev.features & features));
+            vhost_dev_cleanup(&net->dev);
+            goto fail;
+        }
+    }
+
+    vhost_net_ack_features(net, features);
+
     return net;
 fail:
     g_free(net);
@@ -310,6 +329,15 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
         if (r < 0) {
             goto err_start;
         }
+
+        if (ncs[i].peer->vring_enable) {
+            /* restore vring enable state */
+            r = vhost_set_vring_enable(ncs[i].peer, ncs[i].peer->vring_enable);
+
+            if (r < 0) {
+                goto err_start;
+            }
+        }
     }
 
     return 0;
@@ -401,8 +429,15 @@ VHostNetState *get_vhost_net(NetClientState *nc)
 int vhost_set_vring_enable(NetClientState *nc, int enable)
 {
     VHostNetState *net = get_vhost_net(nc);
-    const VhostOps *vhost_ops = net->dev.vhost_ops;
+    const VhostOps *vhost_ops;
 
+    nc->vring_enable = enable;
+
+    if (!net) {
+        return 0;
+    }
+
+    vhost_ops = net->dev.vhost_ops;
     if (vhost_ops->vhost_set_vring_enable) {
         return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
     }
@@ -442,10 +477,16 @@ uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
 {
     return features;
 }
+
 void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
 {
 }
 
+uint64_t vhost_net_get_acked_features(VHostNetState *net)
+{
+    return 0;
+}
+
 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
 {
     return false;
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 16645e6c23..d97897670d 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -348,7 +348,7 @@ typedef struct {
 /* Interrupt management */
 
 /*
- *This function returns sign whether interrupt line is in asserted state
+ * This function returns sign whether interrupt line is in asserted state
  * This depends on the type of interrupt used. For INTX interrupt line will
  * be asserted until explicit deassertion, for MSI(X) interrupt line will
  * be deasserted automatically due to notification semantics of the MSI(X)
diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
index 0937fa34be..b4a7806e2e 100644
--- a/hw/pci-bridge/ioh3420.c
+++ b/hw/pci-bridge/ioh3420.c
@@ -106,12 +106,14 @@ static int ioh3420_initfn(PCIDevice *d)
     if (rc < 0) {
         goto err_bridge;
     }
+
     rc = msi_init(d, IOH_EP_MSI_OFFSET, IOH_EP_MSI_NR_VECTOR,
                   IOH_EP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT,
                   IOH_EP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT);
     if (rc < 0) {
         goto err_bridge;
     }
+
     rc = pcie_cap_init(d, IOH_EP_EXP_OFFSET, PCI_EXP_TYPE_ROOT_PORT, p->port);
     if (rc < 0) {
         goto err_msi;
@@ -120,18 +122,21 @@ static int ioh3420_initfn(PCIDevice *d)
     pcie_cap_arifwd_init(d);
     pcie_cap_deverr_init(d);
     pcie_cap_slot_init(d, s->slot);
+    pcie_cap_root_init(d);
+
     pcie_chassis_create(s->chassis);
     rc = pcie_chassis_add_slot(s);
     if (rc < 0) {
         goto err_pcie_cap;
     }
-    pcie_cap_root_init(d);
+
     rc = pcie_aer_init(d, IOH_EP_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
     }
     pcie_aer_root_init(d);
     ioh3420_aer_vector_update(d);
+
     return 0;
 
 err:
diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c
index 7b582e96ac..41ca47b15a 100644
--- a/hw/pci-bridge/pci_bridge_dev.c
+++ b/hw/pci-bridge/pci_bridge_dev.c
@@ -68,10 +68,12 @@ static int pci_bridge_dev_initfn(PCIDevice *dev)
         /* MSI is not applicable without SHPC */
         bridge_dev->flags &= ~(1 << PCI_BRIDGE_DEV_F_MSI_REQ);
     }
+
     err = slotid_cap_init(dev, 0, bridge_dev->chassis_nr, 0);
     if (err) {
         goto slotid_error;
     }
+
     if ((bridge_dev->flags & (1 << PCI_BRIDGE_DEV_F_MSI_REQ)) &&
         msi_nonbroken) {
         err = msi_init(dev, 0, 1, true, true);
@@ -79,6 +81,7 @@ static int pci_bridge_dev_initfn(PCIDevice *dev)
             goto msi_error;
         }
     }
+
     if (shpc_present(dev)) {
         /* TODO: spec recommends using 64 bit prefetcheable BAR.
          * Check whether that works well. */
@@ -86,6 +89,7 @@ static int pci_bridge_dev_initfn(PCIDevice *dev)
                          PCI_BASE_ADDRESS_MEM_TYPE_64, &bridge_dev->bar);
     }
     return 0;
+
 msi_error:
     slotid_cap_cleanup(dev);
 slotid_error:
diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
index cf1ee63aba..e6d653de4f 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -70,11 +70,13 @@ static int xio3130_downstream_initfn(PCIDevice *d)
     if (rc < 0) {
         goto err_bridge;
     }
+
     rc = pci_bridge_ssvid_init(d, XIO3130_SSVID_OFFSET,
                                XIO3130_SSVID_SVID, XIO3130_SSVID_SSID);
     if (rc < 0) {
         goto err_bridge;
     }
+
     rc = pcie_cap_init(d, XIO3130_EXP_OFFSET, PCI_EXP_TYPE_DOWNSTREAM,
                        p->port);
     if (rc < 0) {
@@ -83,12 +85,14 @@ static int xio3130_downstream_initfn(PCIDevice *d)
     pcie_cap_flr_init(d);
     pcie_cap_deverr_init(d);
     pcie_cap_slot_init(d, s->slot);
+    pcie_cap_arifwd_init(d);
+
     pcie_chassis_create(s->chassis);
     rc = pcie_chassis_add_slot(s);
     if (rc < 0) {
         goto err_pcie_cap;
     }
-    pcie_cap_arifwd_init(d);
+
     rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
index 164ef58c46..d97684474f 100644
--- a/hw/pci-bridge/xio3130_upstream.c
+++ b/hw/pci-bridge/xio3130_upstream.c
@@ -66,11 +66,13 @@ static int xio3130_upstream_initfn(PCIDevice *d)
     if (rc < 0) {
         goto err_bridge;
     }
+
     rc = pci_bridge_ssvid_init(d, XIO3130_SSVID_OFFSET,
                                XIO3130_SSVID_SVID, XIO3130_SSVID_SSID);
     if (rc < 0) {
         goto err_bridge;
     }
+
     rc = pcie_cap_init(d, XIO3130_EXP_OFFSET, PCI_EXP_TYPE_UPSTREAM,
                        p->port);
     if (rc < 0) {
@@ -78,6 +80,7 @@ static int xio3130_upstream_initfn(PCIDevice *d)
     }
     pcie_cap_flr_init(d);
     pcie_cap_deverr_init(d);
+
     rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
diff --git a/hw/pci/msi.c b/hw/pci/msi.c
index a87ef4d752..ed792251dd 100644
--- a/hw/pci/msi.c
+++ b/hw/pci/msi.c
@@ -165,6 +165,21 @@ bool msi_enabled(const PCIDevice *dev)
          PCI_MSI_FLAGS_ENABLE);
 }
 
+/*
+ * Make PCI device @dev MSI-capable.
+ * Non-zero @offset puts capability MSI at that offset in PCI config
+ * space.
+ * @nr_vectors is the number of MSI vectors (1, 2, 4, 8, 16 or 32).
+ * If @msi64bit, make the device capable of sending a 64-bit message
+ * address.
+ * If @msi_per_vector_mask, make the device support per-vector masking.
+ * Return 0 on success, return -errno on error.
+ *
+ * -ENOTSUP means lacking msi support for a msi-capable platform.
+ * -EINVAL means capability overlap, happens when @offset is non-zero,
+ *  also means a programming error, except device assignment, which can check
+ *  if a real HW is broken.
+ */
 int msi_init(struct PCIDevice *dev, uint8_t offset,
              unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask)
 {
@@ -220,7 +235,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
         pci_set_long(dev->wmask + msi_mask_off(dev, msi64bit),
                      0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors));
     }
-    return config_offset;
+
+    return 0;
 }
 
 void msi_uninit(struct PCIDevice *dev)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index bb605efae0..4b585f47b6 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -836,6 +836,81 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
     address_space_destroy(&pci_dev->bus_master_as);
 }
 
+/* Extract PCIReqIDCache into BDF format */
+static uint16_t pci_req_id_cache_extract(PCIReqIDCache *cache)
+{
+    uint8_t bus_n;
+    uint16_t result;
+
+    switch (cache->type) {
+    case PCI_REQ_ID_BDF:
+        result = pci_get_bdf(cache->dev);
+        break;
+    case PCI_REQ_ID_SECONDARY_BUS:
+        bus_n = pci_bus_num(cache->dev->bus);
+        result = PCI_BUILD_BDF(bus_n, 0);
+        break;
+    default:
+        error_printf("Invalid PCI requester ID cache type: %d\n",
+                     cache->type);
+        exit(1);
+        break;
+    }
+
+    return result;
+}
+
+/* Parse bridges up to the root complex and return requester ID
+ * cache for specific device.  For full PCIe topology, the cache
+ * result would be exactly the same as getting BDF of the device.
+ * However, several tricks are required when system mixed up with
+ * legacy PCI devices and PCIe-to-PCI bridges.
+ *
+ * Here we cache the proxy device (and type) not requester ID since
+ * bus number might change from time to time.
+ */
+static PCIReqIDCache pci_req_id_cache_get(PCIDevice *dev)
+{
+    PCIDevice *parent;
+    PCIReqIDCache cache = {
+        .dev = dev,
+        .type = PCI_REQ_ID_BDF,
+    };
+
+    while (!pci_bus_is_root(dev->bus)) {
+        /* We are under PCI/PCIe bridges */
+        parent = dev->bus->parent_dev;
+        if (pci_is_express(parent)) {
+            if (pcie_cap_get_type(parent) == PCI_EXP_TYPE_PCI_BRIDGE) {
+                /* When we pass through PCIe-to-PCI/PCIX bridges, we
+                 * override the requester ID using secondary bus
+                 * number of parent bridge with zeroed devfn
+                 * (pcie-to-pci bridge spec chap 2.3). */
+                cache.type = PCI_REQ_ID_SECONDARY_BUS;
+                cache.dev = dev;
+            }
+        } else {
+            /* Legacy PCI, override requester ID with the bridge's
+             * BDF upstream.  When the root complex connects to
+             * legacy PCI devices (including buses), it can only
+             * obtain requester ID info from directly attached
+             * devices.  If devices are attached under bridges, only
+             * the requester ID of the bridge that is directly
+             * attached to the root complex can be recognized. */
+            cache.type = PCI_REQ_ID_BDF;
+            cache.dev = parent;
+        }
+        dev = parent;
+    }
+
+    return cache;
+}
+
+uint16_t pci_requester_id(PCIDevice *dev)
+{
+    return pci_req_id_cache_extract(&dev->requester_id_cache);
+}
+
 /* -1 for devfn means auto assign */
 static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                                          const char *name, int devfn,
@@ -885,6 +960,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     }
 
     pci_dev->devfn = devfn;
+    pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
     dma_as = pci_device_iommu_address_space(pci_dev);
 
     memory_region_init_alias(&pci_dev->bus_master_enable_region,
@@ -2152,10 +2228,8 @@ int pci_add_capability2(PCIDevice *pdev, uint8_t cap_id,
 
     if (!offset) {
         offset = pci_find_space(pdev, size);
-        if (!offset) {
-            error_setg(errp, "out of PCI config space");
-            return -ENOSPC;
-        }
+        /* out of PCI config space is programming error */
+        assert(offset);
     } else {
         /* Verify that capabilities don't overlap.  Note: device assignment
          * depends on this check to verify that the device is not broken.
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 4420f47598..9ed1624f09 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -15,6 +15,7 @@
 #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
 #define PCI_SLOT(devfn)         (((devfn) >> 3) & 0x1f)
 #define PCI_FUNC(devfn)         ((devfn) & 0x07)
+#define PCI_BUILD_BDF(bus, devfn)     ((bus << 8) | (devfn))
 #define PCI_SLOT_MAX            32
 #define PCI_FUNC_MAX            8
 
@@ -230,6 +231,20 @@ typedef void (*MSIVectorPollNotifier)(PCIDevice *dev,
                                       unsigned int vector_start,
                                       unsigned int vector_end);
 
+enum PCIReqIDType {
+    PCI_REQ_ID_INVALID = 0,
+    PCI_REQ_ID_BDF,
+    PCI_REQ_ID_SECONDARY_BUS,
+    PCI_REQ_ID_MAX,
+};
+typedef enum PCIReqIDType PCIReqIDType;
+
+struct PCIReqIDCache {
+    PCIDevice *dev;
+    PCIReqIDType type;
+};
+typedef struct PCIReqIDCache PCIReqIDCache;
+
 struct PCIDevice {
     DeviceState qdev;
 
@@ -252,6 +267,11 @@ struct PCIDevice {
     /* the following fields are read only */
     PCIBus *bus;
     int32_t devfn;
+    /* Cached device to fetch requester ID from, to avoid the PCI
+     * tree walking every time we invoke PCI request (e.g.,
+     * MSI). For conventional PCI root complex, this field is
+     * meaningless. */
+    PCIReqIDCache requester_id_cache;
     char name[64];
     PCIIORegion io_regions[PCI_NUM_REGIONS];
     AddressSpace bus_master_as;
@@ -692,11 +712,13 @@ static inline uint32_t pci_config_size(const PCIDevice *d)
     return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
 }
 
-static inline uint16_t pci_requester_id(PCIDevice *dev)
+static inline uint16_t pci_get_bdf(PCIDevice *dev)
 {
-    return (pci_bus_num(dev->bus) << 8) | dev->devfn;
+    return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
 }
 
+uint16_t pci_requester_id(PCIDevice *dev);
+
 /* DMA access functions */
 static inline AddressSpace *pci_get_address_space(PCIDevice *dev)
 {
diff --git a/include/net/net.h b/include/net/net.h
index a69e382ba7..a5c5095154 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -99,6 +99,7 @@ struct NetClientState {
     NetClientDestructor *destructor;
     unsigned int queue_index;
     unsigned rxfilter_notify_enabled:1;
+    int vring_enable;
     QTAILQ_HEAD(NetFilterHead, NetFilterState) filters;
 };
 
diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h
index 85109f63aa..efae35d57e 100644
--- a/include/net/vhost-user.h
+++ b/include/net/vhost-user.h
@@ -13,5 +13,6 @@
 
 struct vhost_net;
 struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
+uint64_t vhost_user_get_acked_features(NetClientState *nc);
 
 #endif /* VHOST_USER_H_ */
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 3389b410d8..0bd48770d8 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -31,4 +31,7 @@ int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr);
 VHostNetState *get_vhost_net(NetClientState *nc);
 
 int vhost_set_vring_enable(NetClientState * nc, int enable);
+
+uint64_t vhost_net_get_acked_features(VHostNetState *net);
+
 #endif
diff --git a/include/sysemu/char.h b/include/sysemu/char.h
index 372a6fd40b..1eb2d0f309 100644
--- a/include/sysemu/char.h
+++ b/include/sysemu/char.h
@@ -75,6 +75,7 @@ struct CharDriverState {
     IOReadHandler *chr_read;
     void *handler_opaque;
     void (*chr_close)(struct CharDriverState *chr);
+    void (*chr_disconnect)(struct CharDriverState *chr);
     void (*chr_accept_input)(struct CharDriverState *chr);
     void (*chr_set_echo)(struct CharDriverState *chr, bool echo);
     void (*chr_set_fe_open)(struct CharDriverState *chr, int fe_open);
@@ -143,6 +144,12 @@ void qemu_chr_parse_common(QemuOpts *opts, ChardevCommon *backend);
  */
 CharDriverState *qemu_chr_new(const char *label, const char *filename,
                               void (*init)(struct CharDriverState *s));
+/**
+ * @qemu_chr_disconnect:
+ *
+ * Close a fd accpeted by character backend.
+ */
+void qemu_chr_disconnect(CharDriverState *chr);
 
 /**
  * @qemu_chr_new_noreplay:
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 1b9e73a2dc..d72ce9b490 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -22,6 +22,8 @@ typedef struct VhostUserState {
     NetClientState nc;
     CharDriverState *chr;
     VHostNetState *vhost_net;
+    int watch;
+    uint64_t acked_features;
 } VhostUserState;
 
 typedef struct VhostUserChardevProps {
@@ -36,6 +38,13 @@ VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
     return s->vhost_net;
 }
 
+uint64_t vhost_user_get_acked_features(NetClientState *nc)
+{
+    VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
+    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
+    return s->acked_features;
+}
+
 static int vhost_user_running(VhostUserState *s)
 {
     return (s->vhost_net) ? 1 : 0;
@@ -55,6 +64,8 @@ static void vhost_user_stop(int queues, NetClientState *ncs[])
         }
 
         if (s->vhost_net) {
+            /* save acked features */
+            s->acked_features = vhost_net_get_acked_features(s->vhost_net);
             vhost_net_cleanup(s->vhost_net);
             s->vhost_net = NULL;
         }
@@ -167,6 +178,20 @@ static NetClientInfo net_vhost_user_info = {
         .has_ufo = vhost_user_has_ufo,
 };
 
+static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond,
+                                           void *opaque)
+{
+    VhostUserState *s = opaque;
+    uint8_t buf[1];
+
+    /* We don't actually want to read anything, but CHR_EVENT_CLOSED will be
+     * raised as a side-effect of the read.
+     */
+    qemu_chr_fe_read_all(s->chr, buf, sizeof(buf));
+
+    return FALSE;
+}
+
 static void net_vhost_user_event(void *opaque, int event)
 {
     const char *name = opaque;
@@ -184,14 +209,19 @@ static void net_vhost_user_event(void *opaque, int event)
     trace_vhost_user_event(s->chr->label, event);
     switch (event) {
     case CHR_EVENT_OPENED:
+        s->watch = qemu_chr_fe_add_watch(s->chr, G_IO_HUP,
+                                         net_vhost_user_watch, s);
         if (vhost_user_start(queues, ncs) < 0) {
-            exit(1);
+            qemu_chr_disconnect(s->chr);
+            return;
         }
         qmp_set_link(name, true, &err);
         break;
     case CHR_EVENT_CLOSED:
         qmp_set_link(name, false, &err);
         vhost_user_stop(queues, ncs);
+        g_source_remove(s->watch);
+        s->watch = 0;
         break;
     }
 
diff --git a/qemu-char.c b/qemu-char.c
index c926e9a379..84f49acbac 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -4011,6 +4011,13 @@ void qemu_chr_fe_release(CharDriverState *s)
     s->avail_connections++;
 }
 
+void qemu_chr_disconnect(CharDriverState *chr)
+{
+    if (chr->chr_disconnect) {
+        chr->chr_disconnect(chr);
+    }
+}
+
 static void qemu_chr_free_common(CharDriverState *chr)
 {
     g_free(chr->filename);
@@ -4388,6 +4395,7 @@ static CharDriverState *qmp_chardev_open_socket(const char *id,
     chr->chr_write = tcp_chr_write;
     chr->chr_sync_read = tcp_chr_sync_read;
     chr->chr_close = tcp_chr_close;
+    chr->chr_disconnect = tcp_chr_disconnect;
     chr->get_msgfds = tcp_get_msgfds;
     chr->set_msgfds = tcp_set_msgfds;
     chr->chr_add_client = tcp_chr_add_client;
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 33fcdcb44e..a2ed83bbb4 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -235,7 +235,7 @@ check-qtest-i386-y += tests/test-netfilter$(EXESUF)
 check-qtest-i386-y += tests/test-filter-mirror$(EXESUF)
 check-qtest-i386-y += tests/test-filter-redirector$(EXESUF)
 check-qtest-i386-y += tests/postcopy-test$(EXESUF)
-check-qtest-x86_64-y = $(check-qtest-i386-y)
+check-qtest-x86_64-y += $(check-qtest-i386-y)
 gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
 gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
 check-qtest-mips-y = tests/endianness-test$(EXESUF)
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 57c8f91c5e..45fa2b6148 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -945,6 +945,13 @@ vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
     DPRINT("    vring_avail at %p\n", vq->avail);
 
     vq->last_used_index = vq->used->idx;
+
+    if (vq->last_avail_index != vq->used->idx) {
+        DPRINT("Last avail index != used index: %d != %d, resuming",
+               vq->last_avail_index, vq->used->idx);
+        vq->last_avail_index = vq->used->idx;
+    }
+
     return 0;
 }
 
@@ -1203,12 +1210,13 @@ vubr_accept_cb(int sock, void *ctx)
 }
 
 static VubrDev *
-vubr_new(const char *path)
+vubr_new(const char *path, bool client)
 {
     VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
     dev->nregions = 0;
     int i;
     struct sockaddr_un un;
+    CallbackFunc cb;
     size_t len;
 
     for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
@@ -1237,21 +1245,30 @@ vubr_new(const char *path)
     un.sun_family = AF_UNIX;
     strcpy(un.sun_path, path);
     len = sizeof(un.sun_family) + strlen(path);
-    unlink(path);
 
-    if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
-        vubr_die("bind");
-    }
+    if (!client) {
+        unlink(path);
+
+        if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
+            vubr_die("bind");
+        }
+
+        if (listen(dev->sock, 1) == -1) {
+            vubr_die("listen");
+        }
+        cb = vubr_accept_cb;
 
-    if (listen(dev->sock, 1) == -1) {
-        vubr_die("listen");
+        DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
+    } else {
+        if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
+            vubr_die("connect");
+        }
+        cb = vubr_receive_cb;
     }
 
     dispatcher_init(&dev->dispatcher);
-    dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev,
-                   vubr_accept_cb);
+    dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb);
 
-    DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
     return dev;
 }
 
@@ -1368,8 +1385,9 @@ main(int argc, char *argv[])
 {
     VubrDev *dev;
     int opt;
+    bool client = false;
 
-    while ((opt = getopt(argc, argv, "l:r:u:")) != -1) {
+    while ((opt = getopt(argc, argv, "l:r:u:c")) != -1) {
 
         switch (opt) {
         case 'l':
@@ -1385,16 +1403,20 @@ main(int argc, char *argv[])
         case 'u':
             ud_socket_path = strdup(optarg);
             break;
+        case 'c':
+            client = true;
+            break;
         default:
             goto out;
         }
     }
 
-    DPRINT("ud socket: %s\n", ud_socket_path);
+    DPRINT("ud socket: %s (%s)\n", ud_socket_path,
+           client ? "client" : "server");
     DPRINT("local:     %s:%s\n", lhost, lport);
     DPRINT("remote:    %s:%s\n", rhost, rport);
 
-    dev = vubr_new(ud_socket_path);
+    dev = vubr_new(ud_socket_path, client);
     if (!dev) {
         return 1;
     }
@@ -1405,13 +1427,14 @@ main(int argc, char *argv[])
 
 out:
     fprintf(stderr, "Usage: %s ", argv[0]);
-    fprintf(stderr, "[-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
+    fprintf(stderr, "[-c] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
     fprintf(stderr, "\t-u path to unix doman socket. default: %s\n",
             DEFAULT_UD_SOCKET);
     fprintf(stderr, "\t-l local host and port. default: %s:%s\n",
             DEFAULT_LHOST, DEFAULT_LPORT);
     fprintf(stderr, "\t-r remote host and port. default: %s:%s\n",
             DEFAULT_RHOST, DEFAULT_RPORT);
+    fprintf(stderr, "\t-c client mode\n");
 
     return 1;
 }
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index b2b1665ab8..8b2164b99d 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -32,7 +32,7 @@
 #define QEMU_CMD_ACCEL  " -machine accel=tcg"
 #define QEMU_CMD_MEM    " -m %d -object memory-backend-file,id=mem,size=%dM,"\
                         "mem-path=%s,share=on -numa node,memdev=mem"
-#define QEMU_CMD_CHR    " -chardev socket,id=%s,path=%s"
+#define QEMU_CMD_CHR    " -chardev socket,id=%s,path=%s%s"
 #define QEMU_CMD_NETDEV " -netdev vhost-user,id=net0,chardev=%s,vhostforce"
 #define QEMU_CMD_NET    " -device virtio-net-pci,netdev=net0,romfile=./pc-bios/pxe-virtio.rom"
 
@@ -244,7 +244,12 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
 
     if (msg.size) {
         p += VHOST_USER_HDR_SIZE;
-        g_assert_cmpint(qemu_chr_fe_read_all(chr, p, msg.size), ==, msg.size);
+        size = qemu_chr_fe_read_all(chr, p, msg.size);
+        if (size != msg.size) {
+            g_test_message("Wrong message size received %d != %d\n",
+                           size, msg.size);
+            return;
+        }
     }
 
     switch (msg.request) {
@@ -361,17 +366,10 @@ static const char *init_hugepagefs(const char *path)
 static TestServer *test_server_new(const gchar *name)
 {
     TestServer *server = g_new0(TestServer, 1);
-    gchar *chr_path;
 
     server->socket_path = g_strdup_printf("%s/%s.sock", tmpfs, name);
     server->mig_path = g_strdup_printf("%s/%s.mig", tmpfs, name);
-
-    chr_path = g_strdup_printf("unix:%s,server,nowait", server->socket_path);
     server->chr_name = g_strdup_printf("chr-%s", name);
-    server->chr = qemu_chr_new(server->chr_name, chr_path, NULL);
-    g_free(chr_path);
-
-    qemu_chr_add_handlers(server->chr, chr_can_read, chr_read, NULL, server);
 
     g_mutex_init(&server->data_mutex);
     g_cond_init(&server->data_cond);
@@ -381,13 +379,34 @@ static TestServer *test_server_new(const gchar *name)
     return server;
 }
 
-#define GET_QEMU_CMD(s)                                                        \
-    g_strdup_printf(QEMU_CMD, 512, 512, (root), (s)->chr_name,                 \
-                    (s)->socket_path, (s)->chr_name)
+static void test_server_create_chr(TestServer *server, const gchar *opt)
+{
+    gchar *chr_path;
 
-#define GET_QEMU_CMDE(s, mem, extra, ...)                                      \
-    g_strdup_printf(QEMU_CMD extra, (mem), (mem), (root), (s)->chr_name,       \
-                    (s)->socket_path, (s)->chr_name, ##__VA_ARGS__)
+    chr_path = g_strdup_printf("unix:%s%s", server->socket_path, opt);
+    server->chr = qemu_chr_new(server->chr_name, chr_path, NULL);
+    g_free(chr_path);
+
+    qemu_chr_add_handlers(server->chr, chr_can_read, chr_read, NULL, server);
+}
+
+static void test_server_listen(TestServer *server)
+{
+    test_server_create_chr(server, ",server,nowait");
+}
+
+static inline void test_server_connect(TestServer *server)
+{
+    test_server_create_chr(server, ",reconnect=1");
+}
+
+#define GET_QEMU_CMD(s)                                         \
+    g_strdup_printf(QEMU_CMD, 512, 512, (root), (s)->chr_name,  \
+                    (s)->socket_path, "", (s)->chr_name)
+
+#define GET_QEMU_CMDE(s, mem, chr_opts, extra, ...)                     \
+    g_strdup_printf(QEMU_CMD extra, (mem), (mem), (root), (s)->chr_name, \
+                    (s)->socket_path, (chr_opts), (s)->chr_name, ##__VA_ARGS__)
 
 static gboolean _test_server_free(TestServer *server)
 {
@@ -535,7 +554,10 @@ static void test_migrate(void)
     guint8 *log;
     guint64 size;
 
-    cmd = GET_QEMU_CMDE(s, 2, "");
+    test_server_listen(s);
+    test_server_listen(dest);
+
+    cmd = GET_QEMU_CMDE(s, 2, "", "");
     from = qtest_start(cmd);
     g_free(cmd);
 
@@ -543,7 +565,7 @@ static void test_migrate(void)
     size = get_log_size(s);
     g_assert_cmpint(size, ==, (2 * 1024 * 1024) / (VHOST_LOG_PAGE * 8));
 
-    cmd = GET_QEMU_CMDE(dest, 2, " -incoming %s", uri);
+    cmd = GET_QEMU_CMDE(dest, 2, "", " -incoming %s", uri);
     to = qtest_init(cmd);
     g_free(cmd);
 
@@ -603,6 +625,80 @@ static void test_migrate(void)
     global_qtest = global;
 }
 
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
+static void wait_for_rings_started(TestServer *s, size_t count)
+{
+    gint64 end_time;
+
+    g_mutex_lock(&s->data_mutex);
+    end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
+    while (ctpop64(s->rings) != count) {
+        if (!g_cond_wait_until(&s->data_cond, &s->data_mutex, end_time)) {
+            /* timeout has passed */
+            g_assert_cmpint(ctpop64(s->rings), ==, count);
+            break;
+        }
+    }
+
+    g_mutex_unlock(&s->data_mutex);
+}
+
+static gboolean
+reconnect_cb(gpointer user_data)
+{
+    TestServer *s = user_data;
+
+    qemu_chr_disconnect(s->chr);
+
+    return FALSE;
+}
+
+static gpointer
+connect_thread(gpointer data)
+{
+    TestServer *s = data;
+
+    /* wait for qemu to start before first try, to avoid extra warnings */
+    g_usleep(G_USEC_PER_SEC);
+    test_server_connect(s);
+
+    return NULL;
+}
+
+static void test_reconnect_subprocess(void)
+{
+    TestServer *s = test_server_new("reconnect");
+    char *cmd;
+
+    g_thread_new("connect", connect_thread, s);
+    cmd = GET_QEMU_CMDE(s, 2, ",server", "");
+    qtest_start(cmd);
+    g_free(cmd);
+
+    wait_for_fds(s);
+    wait_for_rings_started(s, 2);
+
+    /* reconnect */
+    s->fds_num = 0;
+    s->rings = 0;
+    g_idle_add(reconnect_cb, s);
+    wait_for_fds(s);
+    wait_for_rings_started(s, 2);
+
+    qtest_end();
+    test_server_free(s);
+    return;
+}
+
+static void test_reconnect(void)
+{
+    gchar *path = g_strdup_printf("/%s/vhost-user/reconnect/subprocess",
+                                  qtest_get_arch());
+    g_test_trap_subprocess(path, 0, 0);
+    g_test_trap_assert_passed();
+}
+#endif
+
 int main(int argc, char **argv)
 {
     QTestState *s = NULL;
@@ -634,6 +730,7 @@ int main(int argc, char **argv)
     }
 
     server = test_server_new("test");
+    test_server_listen(server);
 
     loop = g_main_loop_new(NULL, FALSE);
     /* run the main loop thread so the chardev may operate */
@@ -646,6 +743,11 @@ int main(int argc, char **argv)
 
     qtest_add_data_func("/vhost-user/read-guest-mem", server, read_guest_mem);
     qtest_add_func("/vhost-user/migrate", test_migrate);
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
+    qtest_add_func("/vhost-user/reconnect/subprocess",
+                   test_reconnect_subprocess);
+    qtest_add_func("/vhost-user/reconnect", test_reconnect);
+#endif
 
     ret = g_test_run();