8 files changed, 231 insertions, 73 deletions
diff --git a/net/Makefile.objs b/net/Makefile.objs
index b2bf88a0ef..8262f033b9 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -2,8 +2,11 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
 common-obj-y += socket.o
 common-obj-y += dump.o
 common-obj-y += eth.o
+common-obj-y += announce.o
 common-obj-$(CONFIG_L2TPV3) += l2tpv3.o
-common-obj-$(CONFIG_POSIX) += vhost-user.o
+common-obj-$(call land,$(CONFIG_VIRTIO_NET),$(CONFIG_VHOST_NET_USER)) += vhost-user.o
+common-obj-$(call land,$(call lnot,$(CONFIG_VIRTIO_NET)),$(CONFIG_VHOST_NET_USER)) += vhost-user-stub.o
+common-obj-$(CONFIG_ALL) += vhost-user-stub.o
 common-obj-$(CONFIG_SLIRP) += slirp.o
 common-obj-$(CONFIG_VDE) += vde.o
 common-obj-$(CONFIG_NETMAP) += netmap.o
diff --git a/net/announce.c b/net/announce.c
new file mode 100644
index 0000000000..91e9a6e267
--- /dev/null
+++ b/net/announce.c
@@ -0,0 +1,140 @@
+/*
+ *  Self-announce
+ *  (c) 2017-2019 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "net/announce.h"
+#include "net/net.h"
+#include "qapi/clone-visitor.h"
+#include "qapi/qapi-visit-net.h"
+#include "qapi/qapi-commands-net.h"
+#include "trace.h"
+
+int64_t qemu_announce_timer_step(AnnounceTimer *timer)
+{
+    int64_t step;
+
+    step =  timer->params.initial +
+            (timer->params.rounds - timer->round - 1) *
+            timer->params.step;
+
+    if (step < 0 || step > timer->params.max) {
+        step = timer->params.max;
+    }
+    timer_mod(timer->tm, qemu_clock_get_ms(timer->type) + step);
+
+    return step;
+}
+
+void qemu_announce_timer_del(AnnounceTimer *timer)
+{
+    if (timer->tm) {
+        timer_del(timer->tm);
+        timer_free(timer->tm);
+        timer->tm = NULL;
+    }
+}
+
+/*
+ * Under BQL/main thread
+ * Reset the timer to the given parameters/type/notifier.
+ */
+void qemu_announce_timer_reset(AnnounceTimer *timer,
+                               AnnounceParameters *params,
+                               QEMUClockType type,
+                               QEMUTimerCB *cb,
+                               void *opaque)
+{
+    /*
+     * We're under the BQL, so the current timer can't
+     * be firing, so we should be able to delete it.
+     */
+    qemu_announce_timer_del(timer);
+
+    QAPI_CLONE_MEMBERS(AnnounceParameters, &timer->params, params);
+    timer->round = params->rounds;
+    timer->type = type;
+    timer->tm = timer_new_ms(type, cb, opaque);
+}
+
+#ifndef ETH_P_RARP
+#define ETH_P_RARP 0x8035
+#endif
+#define ARP_HTYPE_ETH 0x0001
+#define ARP_PTYPE_IP 0x0800
+#define ARP_OP_REQUEST_REV 0x3
+
+static int announce_self_create(uint8_t *buf,
+                                uint8_t *mac_addr)
+{
+    /* Ethernet header. */
+    memset(buf, 0xff, 6);         /* destination MAC addr */
+    memcpy(buf + 6, mac_addr, 6); /* source MAC addr */
+    *(uint16_t *)(buf + 12) = htons(ETH_P_RARP); /* ethertype */
+
+    /* RARP header. */
+    *(uint16_t *)(buf + 14) = htons(ARP_HTYPE_ETH); /* hardware addr space */
+    *(uint16_t *)(buf + 16) = htons(ARP_PTYPE_IP); /* protocol addr space */
+    *(buf + 18) = 6; /* hardware addr length (ethernet) */
+    *(buf + 19) = 4; /* protocol addr length (IPv4) */
+    *(uint16_t *)(buf + 20) = htons(ARP_OP_REQUEST_REV); /* opcode */
+    memcpy(buf + 22, mac_addr, 6); /* source hw addr */
+    memset(buf + 28, 0x00, 4);     /* source protocol addr */
+    memcpy(buf + 32, mac_addr, 6); /* target hw addr */
+    memset(buf + 38, 0x00, 4);     /* target protocol addr */
+
+    /* Padding to get up to 60 bytes (ethernet min packet size, minus FCS). */
+    memset(buf + 42, 0x00, 18);
+
+    return 60; /* len (FCS will be added by hardware) */
+}
+
+static void qemu_announce_self_iter(NICState *nic, void *opaque)
+{
+    uint8_t buf[60];
+    int len;
+
+    trace_qemu_announce_self_iter(qemu_ether_ntoa(&nic->conf->macaddr));
+    len = announce_self_create(buf, nic->conf->macaddr.a);
+
+    qemu_send_packet_raw(qemu_get_queue(nic), buf, len);
+
+    /* if the NIC provides it's own announcement support, use it as well */
+    if (nic->ncs->info->announce) {
+        nic->ncs->info->announce(nic->ncs);
+    }
+}
+static void qemu_announce_self_once(void *opaque)
+{
+    AnnounceTimer *timer = (AnnounceTimer *)opaque;
+
+    qemu_foreach_nic(qemu_announce_self_iter, NULL);
+
+    if (--timer->round) {
+        qemu_announce_timer_step(timer);
+    } else {
+        qemu_announce_timer_del(timer);
+    }
+}
+
+void qemu_announce_self(AnnounceTimer *timer, AnnounceParameters *params)
+{
+    qemu_announce_timer_reset(timer, params, QEMU_CLOCK_REALTIME,
+                              qemu_announce_self_once, timer);
+    if (params->rounds) {
+        qemu_announce_self_once(timer);
+    } else {
+        qemu_announce_timer_del(timer);
+    }
+}
+
+void qmp_announce_self(AnnounceParameters *params, Error **errp)
+{
+    static AnnounceTimer announce_timer;
+    qemu_announce_self(&announce_timer, params);
+}
diff --git a/net/colo-compare.c b/net/colo-compare.c
index 3e515f3023..bf10526f05 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -294,14 +294,6 @@ static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
             return true;
         }
     }
-    if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) {
-        if (colo_compare_packet_payload(ppkt, spkt,
-                                        ppkt->header_size, spkt->header_size,
-                                        ppkt->payload_size)) {
-            *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY;
-            return true;
-        }
-    }
 
     /* one part of secondary packet payload still need to be compared */
     if (!after(ppkt->seq_end, spkt->seq_end)) {
diff --git a/net/net.c b/net/net.c
index 5dcff7fe2a..f3a3c5444c 100644
--- a/net/net.c
+++ b/net/net.c
@@ -961,7 +961,7 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
         [NET_CLIENT_DRIVER_BRIDGE]    = net_init_bridge,
 #endif
         [NET_CLIENT_DRIVER_HUBPORT]   = net_init_hubport,
-#ifdef CONFIG_VHOST_NET_USED
+#ifdef CONFIG_VHOST_NET_USER
         [NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user,
 #endif
 #ifdef CONFIG_L2TPV3
diff --git a/net/netmap.c b/net/netmap.c
index 2d11a8f4be..0cc8f545c5 100644
--- a/net/netmap.c
+++ b/net/netmap.c
@@ -154,65 +154,27 @@ static void netmap_writable(void *opaque)
     qemu_flush_queued_packets(&s->nc);
 }
 
-static ssize_t netmap_receive(NetClientState *nc,
-      const uint8_t *buf, size_t size)
-{
-    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
-    struct netmap_ring *ring = s->tx;
-    uint32_t i;
-    uint32_t idx;
-    uint8_t *dst;
-
-    if (unlikely(!ring)) {
-        /* Drop. */
-        return size;
-    }
-
-    if (unlikely(size > ring->nr_buf_size)) {
-        RD(5, "[netmap_receive] drop packet of size %d > %d\n",
-                                    (int)size, ring->nr_buf_size);
-        return size;
-    }
-
-    if (nm_ring_empty(ring)) {
-        /* No available slots in the netmap TX ring. */
-        netmap_write_poll(s, true);
-        return 0;
-    }
-
-    i = ring->cur;
-    idx = ring->slot[i].buf_idx;
-    dst = (uint8_t *)NETMAP_BUF(ring, idx);
-
-    ring->slot[i].len = size;
-    ring->slot[i].flags = 0;
-    pkt_copy(buf, dst, size);
-    ring->cur = ring->head = nm_ring_next(ring, i);
-    ioctl(s->nmd->fd, NIOCTXSYNC, NULL);
-
-    return size;
-}
-
 static ssize_t netmap_receive_iov(NetClientState *nc,
                     const struct iovec *iov, int iovcnt)
 {
     NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
     struct netmap_ring *ring = s->tx;
+    unsigned int tail = ring->tail;
+    ssize_t totlen = 0;
     uint32_t last;
     uint32_t idx;
     uint8_t *dst;
     int j;
     uint32_t i;
 
-    if (unlikely(!ring)) {
-        /* Drop the packet. */
-        return iov_size(iov, iovcnt);
-    }
-
-    last = i = ring->cur;
+    last = i = ring->head;
 
     if (nm_ring_space(ring) < iovcnt) {
-        /* Not enough netmap slots. */
+        /* Not enough netmap slots. Tell the kernel that we have seen the new
+         * available slots (so that it notifies us again when it has more
+         * ones), but without publishing any new slots to be processed
+         * (e.g., we don't advance ring->head). */
+        ring->cur = tail;
         netmap_write_poll(s, true);
         return 0;
     }
@@ -222,14 +184,17 @@ static ssize_t netmap_receive_iov(NetClientState *nc,
         int offset = 0;
         int nm_frag_size;
 
+        totlen += iov_frag_size;
+
         /* Split each iovec fragment over more netmap slots, if
            necessary. */
         while (iov_frag_size) {
             nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size);
 
-            if (unlikely(nm_ring_empty(ring))) {
-                /* We run out of netmap slots while splitting the
+            if (unlikely(i == tail)) {
+                /* We ran out of netmap slots while splitting the
                    iovec fragments. */
+                ring->cur = tail;
                 netmap_write_poll(s, true);
                 return 0;
             }
@@ -251,12 +216,24 @@ static ssize_t netmap_receive_iov(NetClientState *nc,
     /* The last slot must not have NS_MOREFRAG set. */
     ring->slot[last].flags &= ~NS_MOREFRAG;
 
-    /* Now update ring->cur and ring->head. */
-    ring->cur = ring->head = i;
+    /* Now update ring->head and ring->cur to publish the new slots and
+     * the new wakeup point. */
+    ring->head = ring->cur = i;
 
     ioctl(s->nmd->fd, NIOCTXSYNC, NULL);
 
-    return iov_size(iov, iovcnt);
+    return totlen;
+}
+
+static ssize_t netmap_receive(NetClientState *nc,
+      const uint8_t *buf, size_t size)
+{
+    struct iovec iov;
+
+    iov.iov_base = (void *)buf;
+    iov.iov_len = size;
+
+    return netmap_receive_iov(nc, &iov, 1);
 }
 
 /* Complete a previous send (backend --> guest) and enable the
@@ -272,39 +249,46 @@ static void netmap_send(void *opaque)
 {
     NetmapState *s = opaque;
     struct netmap_ring *ring = s->rx;
+    unsigned int tail = ring->tail;
 
-    /* Keep sending while there are available packets into the netmap
+    /* Keep sending while there are available slots in the netmap
        RX ring and the forwarding path towards the peer is open. */
-    while (!nm_ring_empty(ring)) {
-        uint32_t i;
+    while (ring->head != tail) {
+        uint32_t i = ring->head;
         uint32_t idx;
         bool morefrag;
         int iovcnt = 0;
         int iovsize;
 
+        /* Get a (possibly multi-slot) packet. */
         do {
-            i = ring->cur;
             idx = ring->slot[i].buf_idx;
             morefrag = (ring->slot[i].flags & NS_MOREFRAG);
-            s->iov[iovcnt].iov_base = (u_char *)NETMAP_BUF(ring, idx);
+            s->iov[iovcnt].iov_base = (void *)NETMAP_BUF(ring, idx);
             s->iov[iovcnt].iov_len = ring->slot[i].len;
             iovcnt++;
+            i = nm_ring_next(ring, i);
+        } while (i != tail && morefrag);
 
-            ring->cur = ring->head = nm_ring_next(ring, i);
-        } while (!nm_ring_empty(ring) && morefrag);
+        /* Advance ring->cur to tell the kernel that we have seen the slots. */
+        ring->cur = i;
 
-        if (unlikely(nm_ring_empty(ring) && morefrag)) {
-            RD(5, "[netmap_send] ran out of slots, with a pending"
-                   "incomplete packet\n");
+        if (unlikely(morefrag)) {
+            /* This is a truncated packet, so we can stop without releasing the
+             * incomplete slots by updating ring->head. We will hopefully
+             * re-read the complete packet the next time we are called. */
+            break;
         }
 
         iovsize = qemu_sendv_packet_async(&s->nc, s->iov, iovcnt,
                                             netmap_send_completed);
 
+        /* Release the slots to the kernel. */
+        ring->head = i;
+
         if (iovsize == 0) {
             /* The peer does not receive anymore. Packet is queued, stop
-             * reading from the backend until netmap_send_completed()
-             */
+             * reading from the backend until netmap_send_completed(). */
             netmap_read_poll(s, false);
             break;
         }
diff --git a/net/trace-events b/net/trace-events
index 7b594cfdd2..3417ac05b0 100644
--- a/net/trace-events
+++ b/net/trace-events
@@ -1,5 +1,8 @@
 # See docs/devel/tracing.txt for syntax documentation.
 
+# net/announce.c
+qemu_announce_self_iter(const char *mac) "%s"
+
 # net/vhost-user.c
 vhost_user_event(const char *chr, int event) "chr: %s got event: %d"
 
diff --git a/net/vhost-user-stub.c b/net/vhost-user-stub.c
new file mode 100644
index 0000000000..52ab4e13f1
--- /dev/null
+++ b/net/vhost-user-stub.c
@@ -0,0 +1,23 @@
+/*
+ * vhost-user-stub.c
+ *
+ * Copyright (c) 2018 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "clients.h"
+#include "net/vhost_net.h"
+#include "net/vhost-user.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+
+int net_init_vhost_user(const Netdev *netdev, const char *name,
+                        NetClientState *peer, Error **errp)
+{
+    error_setg(errp, "vhost-user requires frontend driver virtio-net-*");
+    return -1;
+}
diff --git a/net/vhost-user.c b/net/vhost-user.c
index a39f9c9974..cd9659df87 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -172,6 +172,17 @@ static void net_vhost_user_cleanup(NetClientState *nc)
     qemu_purge_queued_packets(nc);
 }
 
+static int vhost_user_set_vnet_endianness(NetClientState *nc,
+                                          bool enable)
+{
+    /* Nothing to do.  If the server supports
+     * VHOST_USER_PROTOCOL_F_CROSS_ENDIAN, it will get the
+     * vnet header endianness from there.  If it doesn't, negotiation
+     * fails.
+     */
+    return 0;
+}
+
 static bool vhost_user_has_vnet_hdr(NetClientState *nc)
 {
     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
@@ -193,6 +204,8 @@ static NetClientInfo net_vhost_user_info = {
         .cleanup = net_vhost_user_cleanup,
         .has_vnet_hdr = vhost_user_has_vnet_hdr,
         .has_ufo = vhost_user_has_ufo,
+        .set_vnet_be = vhost_user_set_vnet_endianness,
+        .set_vnet_le = vhost_user_set_vnet_endianness,
 };
 
 static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond,