summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2010-09-08 14:26:14 -0500
committerAnthony Liguori <aliguori@us.ibm.com>2010-09-08 14:26:14 -0500
commitdccbe6fbab47c9a2589f436e0592933b47cbe40b (patch)
treee5a851b48a3801dd28282eb17533975cade7ac23
parent630c26893d6dc7713c0fcfc3c09d6bfe536a6ce3 (diff)
parenta697a334b3c4d3250e6420f5d38550ea10eb5319 (diff)
downloadfocaccia-qemu-dccbe6fbab47c9a2589f436e0592933b47cbe40b.tar.gz
focaccia-qemu-dccbe6fbab47c9a2589f436e0592933b47cbe40b.zip
Merge remote branch 'mst/for_anthony' into staging
-rw-r--r--hw/e1000.c11
-rw-r--r--hw/s390-virtio-bus.c8
-rw-r--r--hw/s390-virtio-bus.h1
-rw-r--r--hw/syborg_virtio.c8
-rw-r--r--hw/vhost_net.c24
-rw-r--r--hw/virtio-net.c129
-rw-r--r--hw/virtio-net.h14
-rw-r--r--hw/virtio-pci.c8
-rw-r--r--hw/virtio.h4
-rw-r--r--net/tap-aix.c9
-rw-r--r--net/tap-bsd.c9
-rw-r--r--net/tap-linux.c29
-rw-r--r--net/tap-linux.h8
-rw-r--r--net/tap-solaris.c9
-rw-r--r--net/tap-win32.c9
-rw-r--r--net/tap.c49
-rw-r--r--net/tap.h4
17 files changed, 287 insertions, 46 deletions
diff --git a/hw/e1000.c b/hw/e1000.c
index 80b78bc618..7d7d14002f 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -345,7 +345,7 @@ is_vlan_txd(uint32_t txd_lower)
 
 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
  * fill it in, just pad descriptor length by 4 bytes unless guest
- * told us to trip it off the packet. */
+ * told us to strip it off the packet. */
 static inline int
 fcs_len(E1000State *s)
 {
@@ -690,9 +690,14 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
 
     s->mac_reg[GPRC]++;
     s->mac_reg[TPR]++;
-    n = s->mac_reg[TORL];
-    if ((s->mac_reg[TORL] += size) < n)
+    /* TOR - Total Octets Received:
+     * This register includes bytes received in a packet from the <Destination
+     * Address> field through the <CRC> field, inclusively.
+     */
+    n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
+    if (n < s->mac_reg[TORL])
         s->mac_reg[TORH]++;
+    s->mac_reg[TORL] = n;
 
     n = E1000_ICS_RXT0;
     if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
index fe6884d47d..784dc01b97 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390-virtio-bus.c
@@ -27,6 +27,7 @@
 #include "elf.h"
 #include "hw/virtio.h"
 #include "hw/virtio-serial.h"
+#include "hw/virtio-net.h"
 #include "hw/sysbus.h"
 #include "kvm.h"
 
@@ -110,7 +111,7 @@ static int s390_virtio_net_init(VirtIOS390Device *dev)
 {
     VirtIODevice *vdev;
 
-    vdev = virtio_net_init((DeviceState *)dev, &dev->nic);
+    vdev = virtio_net_init((DeviceState *)dev, &dev->nic, &dev->net);
     if (!vdev) {
         return -1;
     }
@@ -327,6 +328,11 @@ static VirtIOS390DeviceInfo s390_virtio_net = {
     .qdev.size = sizeof(VirtIOS390Device),
     .qdev.props = (Property[]) {
         DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
+        DEFINE_PROP_UINT32("x-txtimer", VirtIOS390Device,
+                           net.txtimer, TX_TIMER_INTERVAL),
+        DEFINE_PROP_INT32("x-txburst", VirtIOS390Device,
+                          net.txburst, TX_BURST),
+        DEFINE_PROP_STRING("tx", VirtIOS390Device, net.tx),
         DEFINE_PROP_END_OF_LIST(),
     },
 };
diff --git a/hw/s390-virtio-bus.h b/hw/s390-virtio-bus.h
index 333fea8963..41558c9c67 100644
--- a/hw/s390-virtio-bus.h
+++ b/hw/s390-virtio-bus.h
@@ -43,6 +43,7 @@ typedef struct VirtIOS390Device {
     uint32_t host_features;
     /* Max. number of ports we can have for a the virtio-serial device */
     uint32_t max_virtserial_ports;
+    virtio_net_conf net;
 } VirtIOS390Device;
 
 typedef struct VirtIOS390Bus {
diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index abf0370107..4dfd1a87b9 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -68,6 +68,7 @@ typedef struct {
     uint32_t id;
     NICConf nic;
     uint32_t host_features;
+    virtio_net_conf net;
 } SyborgVirtIOProxy;
 
 static uint32_t syborg_virtio_readl(void *opaque, target_phys_addr_t offset)
@@ -284,7 +285,7 @@ static int syborg_virtio_net_init(SysBusDevice *dev)
     VirtIODevice *vdev;
     SyborgVirtIOProxy *proxy = FROM_SYSBUS(SyborgVirtIOProxy, dev);
 
-    vdev = virtio_net_init(&dev->qdev, &proxy->nic);
+    vdev = virtio_net_init(&dev->qdev, &proxy->nic, &proxy->net);
     return syborg_virtio_init(proxy, vdev);
 }
 
@@ -295,6 +296,11 @@ static SysBusDeviceInfo syborg_virtio_net_info = {
     .qdev.props = (Property[]) {
         DEFINE_NIC_PROPERTIES(SyborgVirtIOProxy, nic),
         DEFINE_VIRTIO_NET_FEATURES(SyborgVirtIOProxy, host_features),
+        DEFINE_PROP_UINT32("x-txtimer", SyborgVirtIOProxy,
+                           net.txtimer, TX_TIMER_INTERVAL),
+        DEFINE_PROP_INT32("x-txburst", SyborgVirtIOProxy,
+                          net.txburst, TX_BURST),
+        DEFINE_PROP_STRING("tx", SyborgVirtIOProxy, net.tx),
         DEFINE_PROP_END_OF_LIST(),
     }
 };
diff --git a/hw/vhost_net.c b/hw/vhost_net.c
index 0c00de272a..4a7b8194f2 100644
--- a/hw/vhost_net.c
+++ b/hw/vhost_net.c
@@ -50,7 +50,9 @@ unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
     if (!(net->dev.features & (1 << VIRTIO_RING_F_INDIRECT_DESC))) {
         features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
     }
-    features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
+    if (!(net->dev.features & (1 << VIRTIO_NET_F_MRG_RXBUF))) {
+        features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
+    }
     return features;
 }
 
@@ -63,6 +65,9 @@ void vhost_net_ack_features(struct vhost_net *net, unsigned features)
     if (features & (1 << VIRTIO_RING_F_INDIRECT_DESC)) {
         net->dev.acked_features |= (1 << VIRTIO_RING_F_INDIRECT_DESC);
     }
+    if (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
+        net->dev.acked_features |= (1 << VIRTIO_NET_F_MRG_RXBUF);
+    }
 }
 
 static int vhost_net_get_fd(VLANClientState *backend)
@@ -97,6 +102,10 @@ struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd)
     if (r < 0) {
         goto fail;
     }
+    if (!tap_has_vnet_hdr_len(backend,
+                              sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
+        net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
+    }
     if (~net->dev.features & net->dev.backend_features) {
         fprintf(stderr, "vhost lacks feature mask %" PRIu64 " for backend\n",
                 (uint64_t)(~net->dev.features & net->dev.backend_features));
@@ -117,6 +126,10 @@ int vhost_net_start(struct vhost_net *net,
 {
     struct vhost_vring_file file = { };
     int r;
+    if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
+        tap_set_vnet_hdr_len(net->vc,
+                             sizeof(struct virtio_net_hdr_mrg_rxbuf));
+    }
 
     net->dev.nvqs = 2;
     net->dev.vqs = net->vqs;
@@ -144,6 +157,9 @@ fail:
     }
     net->vc->info->poll(net->vc, true);
     vhost_dev_stop(&net->dev, dev);
+    if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
+        tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr));
+    }
     return r;
 }
 
@@ -158,11 +174,17 @@ void vhost_net_stop(struct vhost_net *net,
     }
     net->vc->info->poll(net->vc, true);
     vhost_dev_stop(&net->dev, dev);
+    if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
+        tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr));
+    }
 }
 
 void vhost_net_cleanup(struct vhost_net *net)
 {
     vhost_dev_cleanup(&net->dev);
+    if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
+        tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr));
+    }
     qemu_free(net);
 }
 #else
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 075f72df2d..0a9cae22c4 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -36,7 +36,10 @@ typedef struct VirtIONet
     VirtQueue *ctrl_vq;
     NICState *nic;
     QEMUTimer *tx_timer;
-    int tx_timer_active;
+    QEMUBH *tx_bh;
+    uint32_t tx_timeout;
+    int32_t tx_burst;
+    int tx_waiting;
     uint32_t has_vnet_hdr;
     uint8_t has_ufo;
     struct {
@@ -619,7 +622,7 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_
     return size;
 }
 
-static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
+static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
 
 static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
 {
@@ -635,16 +638,18 @@ static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
 }
 
 /* TX */
-static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
+static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
 {
     VirtQueueElement elem;
+    int32_t num_packets = 0;
 
-    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
-        return;
+    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+        return num_packets;
+    }
 
     if (n->async_tx.elem.out_num) {
         virtio_queue_set_notification(n->tx_vq, 0);
-        return;
+        return num_packets;
     }
 
     while (virtqueue_pop(vq, &elem)) {
@@ -681,38 +686,55 @@ static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
             virtio_queue_set_notification(n->tx_vq, 0);
             n->async_tx.elem = elem;
             n->async_tx.len  = len;
-            return;
+            return -EBUSY;
         }
 
         len += ret;
 
         virtqueue_push(vq, &elem, len);
         virtio_notify(&n->vdev, vq);
+
+        if (++num_packets >= n->tx_burst) {
+            break;
+        }
     }
+    return num_packets;
 }
 
-static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
+static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIONet *n = to_virtio_net(vdev);
 
-    if (n->tx_timer_active) {
+    if (n->tx_waiting) {
         virtio_queue_set_notification(vq, 1);
         qemu_del_timer(n->tx_timer);
-        n->tx_timer_active = 0;
+        n->tx_waiting = 0;
         virtio_net_flush_tx(n, vq);
     } else {
         qemu_mod_timer(n->tx_timer,
-                       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
-        n->tx_timer_active = 1;
+                       qemu_get_clock(vm_clock) + n->tx_timeout);
+        n->tx_waiting = 1;
         virtio_queue_set_notification(vq, 0);
     }
 }
 
+static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIONet *n = to_virtio_net(vdev);
+
+    if (unlikely(n->tx_waiting)) {
+        return;
+    }
+    virtio_queue_set_notification(vq, 0);
+    qemu_bh_schedule(n->tx_bh);
+    n->tx_waiting = 1;
+}
+
 static void virtio_net_tx_timer(void *opaque)
 {
     VirtIONet *n = opaque;
 
-    n->tx_timer_active = 0;
+    n->tx_waiting = 0;
 
     /* Just in case the driver is not ready on more */
     if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
@@ -722,6 +744,41 @@ static void virtio_net_tx_timer(void *opaque)
     virtio_net_flush_tx(n, n->tx_vq);
 }
 
+static void virtio_net_tx_bh(void *opaque)
+{
+    VirtIONet *n = opaque;
+    int32_t ret;
+
+    n->tx_waiting = 0;
+
+    /* Just in case the driver is not ready on more */
+    if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
+        return;
+
+    ret = virtio_net_flush_tx(n, n->tx_vq);
+    if (ret == -EBUSY) {
+        return; /* Notification re-enable handled by tx_complete */
+    }
+
+    /* If we flush a full burst of packets, assume there are
+     * more coming and immediately reschedule */
+    if (ret >= n->tx_burst) {
+        qemu_bh_schedule(n->tx_bh);
+        n->tx_waiting = 1;
+        return;
+    }
+
+    /* If less than a full burst, re-enable notification and flush
+     * anything that may have come in while we weren't looking.  If
+     * we find something, assume the guest is still active and reschedule */
+    virtio_queue_set_notification(n->tx_vq, 1);
+    if (virtio_net_flush_tx(n, n->tx_vq) > 0) {
+        virtio_queue_set_notification(n->tx_vq, 0);
+        qemu_bh_schedule(n->tx_bh);
+        n->tx_waiting = 1;
+    }
+}
+
 static void virtio_net_save(QEMUFile *f, void *opaque)
 {
     VirtIONet *n = opaque;
@@ -735,7 +792,7 @@ static void virtio_net_save(QEMUFile *f, void *opaque)
     virtio_save(&n->vdev, f);
 
     qemu_put_buffer(f, n->mac, ETH_ALEN);
-    qemu_put_be32(f, n->tx_timer_active);
+    qemu_put_be32(f, n->tx_waiting);
     qemu_put_be32(f, n->mergeable_rx_bufs);
     qemu_put_be16(f, n->status);
     qemu_put_byte(f, n->promisc);
@@ -764,7 +821,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
     virtio_load(&n->vdev, f);
 
     qemu_get_buffer(f, n->mac, ETH_ALEN);
-    n->tx_timer_active = qemu_get_be32(f);
+    n->tx_waiting = qemu_get_be32(f);
     n->mergeable_rx_bufs = qemu_get_be32(f);
 
     if (version_id >= 3)
@@ -840,9 +897,13 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
     }
     n->mac_table.first_multi = i;
 
-    if (n->tx_timer_active) {
-        qemu_mod_timer(n->tx_timer,
-                       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
+    if (n->tx_waiting) {
+        if (n->tx_timer) {
+            qemu_mod_timer(n->tx_timer,
+                           qemu_get_clock(vm_clock) + n->tx_timeout);
+        } else {
+            qemu_bh_schedule(n->tx_bh);
+        }
     }
     return 0;
 }
@@ -903,7 +964,8 @@ static void virtio_net_vmstate_change(void *opaque, int running, int reason)
     virtio_net_set_status(&n->vdev, n->vdev.status & status);
 }
 
-VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf)
+VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
+                              virtio_net_conf *net)
 {
     VirtIONet *n;
 
@@ -919,7 +981,22 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf)
     n->vdev.reset = virtio_net_reset;
     n->vdev.set_status = virtio_net_set_status;
     n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
-    n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx);
+
+    if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
+        fprintf(stderr, "virtio-net: "
+                "Unknown option tx=%s, valid options: \"timer\" \"bh\"\n",
+                net->tx);
+        fprintf(stderr, "Defaulting to \"bh\"\n");
+    }
+
+    if (net->tx && !strcmp(net->tx, "timer")) {
+        n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_timer);
+        n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
+        n->tx_timeout = net->txtimer;
+    } else {
+        n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_bh);
+        n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
+    }
     n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
     qemu_macaddr_default_if_unset(&conf->macaddr);
     memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
@@ -929,8 +1006,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf)
 
     qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
 
-    n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
-    n->tx_timer_active = 0;
+    n->tx_waiting = 0;
+    n->tx_burst = net->txburst;
     n->mergeable_rx_bufs = 0;
     n->promisc = 1; /* for compatibility */
 
@@ -962,8 +1039,12 @@ void virtio_net_exit(VirtIODevice *vdev)
     qemu_free(n->mac_table.macs);
     qemu_free(n->vlans);
 
-    qemu_del_timer(n->tx_timer);
-    qemu_free_timer(n->tx_timer);
+    if (n->tx_timer) {
+        qemu_del_timer(n->tx_timer);
+        qemu_free_timer(n->tx_timer);
+    } else {
+        qemu_bh_delete(n->tx_bh);
+    }
 
     virtio_cleanup(&n->vdev);
     qemu_del_vlan_client(&n->nic->nc);
diff --git a/hw/virtio-net.h b/hw/virtio-net.h
index 235f1a9fa8..8af9a1ce55 100644
--- a/hw/virtio-net.h
+++ b/hw/virtio-net.h
@@ -49,6 +49,20 @@
 
 #define TX_TIMER_INTERVAL 150000 /* 150 us */
 
+/* Limit the number of packets that can be sent via a single flush
+ * of the TX queue.  This gives us a guaranteed exit condition and
+ * ensures fairness in the io path.  256 conveniently matches the
+ * length of the TX queue and shows a good balance of performance
+ * and latency. */
+#define TX_BURST 256
+
+typedef struct virtio_net_conf
+{
+    uint32_t txtimer;
+    int32_t txburst;
+    char *tx;
+} virtio_net_conf;
+
 /* Maximum packet size we can receive from tap device: header + 64k */
 #define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 << 10))
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 6e8f88a141..86e6b0a561 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -107,6 +107,7 @@ typedef struct {
 #endif
     /* Max. number of ports we can have for a the virtio-serial device */
     uint32_t max_virtserial_ports;
+    virtio_net_conf net;
 } VirtIOPCIProxy;
 
 /* virtio device */
@@ -613,7 +614,7 @@ static int virtio_net_init_pci(PCIDevice *pci_dev)
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
     VirtIODevice *vdev;
 
-    vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic);
+    vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic, &proxy->net);
 
     vdev->nvectors = proxy->nvectors;
     virtio_init_pci(proxy, vdev,
@@ -690,6 +691,11 @@ static PCIDeviceInfo virtio_info[] = {
             DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
             DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
             DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
+            DEFINE_PROP_UINT32("x-txtimer", VirtIOPCIProxy,
+                               net.txtimer, TX_TIMER_INTERVAL),
+            DEFINE_PROP_INT32("x-txburst", VirtIOPCIProxy,
+                              net.txburst, TX_BURST),
+            DEFINE_PROP_STRING("tx", VirtIOPCIProxy, net.tx),
             DEFINE_PROP_END_OF_LIST(),
         },
         .qdev.reset = virtio_pci_reset,
diff --git a/hw/virtio.h b/hw/virtio.h
index 5836ab61e7..a60d7355de 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -185,7 +185,9 @@ void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
 
 /* Base devices.  */
 VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf);
-VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf);
+struct virtio_net_conf;
+VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
+                              struct virtio_net_conf *net);
 VirtIODevice *virtio_serial_init(DeviceState *dev, uint32_t max_nr_ports);
 VirtIODevice *virtio_balloon_init(DeviceState *dev);
 #ifdef CONFIG_LINUX
diff --git a/net/tap-aix.c b/net/tap-aix.c
index 4bc9f2f6d2..e19aaba110 100644
--- a/net/tap-aix.c
+++ b/net/tap-aix.c
@@ -46,6 +46,15 @@ int tap_probe_has_ufo(int fd)
     return 0;
 }
 
+int tap_probe_vnet_hdr_len(int fd, int len)
+{
+    return 0;
+}
+
+void tap_fd_set_vnet_hdr_len(int fd, int len)
+{
+}
+
 void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 3773d5da5e..3513075337 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -116,6 +116,15 @@ int tap_probe_has_ufo(int fd)
     return 0;
 }
 
+int tap_probe_vnet_hdr_len(int fd, int len)
+{
+    return 0;
+}
+
+void tap_fd_set_vnet_hdr_len(int fd, int len)
+{
+}
+
 void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
diff --git a/net/tap-linux.c b/net/tap-linux.c
index c92983cb53..f7aa9041b8 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -129,6 +129,35 @@ int tap_probe_has_ufo(int fd)
     return 1;
 }
 
+/* Verify that we can assign given length */
+int tap_probe_vnet_hdr_len(int fd, int len)
+{
+    int orig;
+    if (ioctl(fd, TUNGETVNETHDRSZ, &orig) == -1) {
+        return 0;
+    }
+    if (ioctl(fd, TUNSETVNETHDRSZ, &len) == -1) {
+        return 0;
+    }
+    /* Restore original length: we can't handle failure. */
+    if (ioctl(fd, TUNSETVNETHDRSZ, &orig) == -1) {
+        fprintf(stderr, "TUNGETVNETHDRSZ ioctl() failed: %s. Exiting.\n",
+                strerror(errno));
+        assert(0);
+        return -errno;
+    }
+    return 1;
+}
+
+void tap_fd_set_vnet_hdr_len(int fd, int len)
+{
+    if (ioctl(fd, TUNSETVNETHDRSZ, &len) == -1) {
+        fprintf(stderr, "TUNSETVNETHDRSZ ioctl() failed: %s. Exiting.\n",
+                strerror(errno));
+        assert(0);
+    }
+}
+
 void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
diff --git a/net/tap-linux.h b/net/tap-linux.h
index 9f943589bf..659e98122b 100644
--- a/net/tap-linux.h
+++ b/net/tap-linux.h
@@ -27,6 +27,8 @@
 #define TUNSETOFFLOAD  _IOW('T', 208, unsigned int)
 #define TUNGETIFF      _IOR('T', 210, unsigned int)
 #define TUNSETSNDBUF   _IOW('T', 212, int)
+#define TUNGETVNETHDRSZ _IOR('T', 215, int)
+#define TUNSETVNETHDRSZ _IOW('T', 216, int)
 
 #endif
 
@@ -52,4 +54,10 @@ struct virtio_net_hdr
     uint16_t csum_offset;
 };
 
+struct virtio_net_hdr_mrg_rxbuf
+{
+    struct virtio_net_hdr hdr;
+    uint16_t num_buffers;   /* Number of merged rx buffers */
+};
+
 #endif /* QEMU_TAP_H */
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index 4ac61d2ad6..c216d28267 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -212,6 +212,15 @@ int tap_probe_has_ufo(int fd)
     return 0;
 }
 
+int tap_probe_vnet_hdr_len(int fd, int len)
+{
+    return 0;
+}
+
+void tap_fd_set_vnet_hdr_len(int fd, int len)
+{
+}
+
 void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
diff --git a/net/tap-win32.c b/net/tap-win32.c
index 74348daf5a..9fe4fcd5f4 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -728,6 +728,15 @@ int tap_has_vnet_hdr(VLANClientState *vc)
     return 0;
 }
 
+int tap_probe_vnet_hdr_len(int fd, int len)
+{
+    return 0;
+}
+
+void tap_fd_set_vnet_hdr_len(int fd, int len)
+{
+}
+
 void tap_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr)
 {
 }
diff --git a/net/tap.c b/net/tap.c
index 0147dabc15..4afb314fde 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -57,10 +57,10 @@ typedef struct TAPState {
     uint8_t buf[TAP_BUFSIZE];
     unsigned int read_poll : 1;
     unsigned int write_poll : 1;
-    unsigned int has_vnet_hdr : 1;
     unsigned int using_vnet_hdr : 1;
     unsigned int has_ufo: 1;
     VHostNetState *vhost_net;
+    unsigned host_vnet_hdr_len;
 } TAPState;
 
 static int launch_script(const char *setup_script, const char *ifname, int fd);
@@ -121,11 +121,11 @@ static ssize_t tap_receive_iov(VLANClientState *nc, const struct iovec *iov,
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
     const struct iovec *iovp = iov;
     struct iovec iov_copy[iovcnt + 1];
-    struct virtio_net_hdr hdr = { 0, };
+    struct virtio_net_hdr_mrg_rxbuf hdr = { };
 
-    if (s->has_vnet_hdr && !s->using_vnet_hdr) {
+    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
         iov_copy[0].iov_base = &hdr;
-        iov_copy[0].iov_len =  sizeof(hdr);
+        iov_copy[0].iov_len =  s->host_vnet_hdr_len;
         memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
         iovp = iov_copy;
         iovcnt++;
@@ -139,11 +139,11 @@ static ssize_t tap_receive_raw(VLANClientState *nc, const uint8_t *buf, size_t s
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
     struct iovec iov[2];
     int iovcnt = 0;
-    struct virtio_net_hdr hdr = { 0, };
+    struct virtio_net_hdr_mrg_rxbuf hdr = { };
 
-    if (s->has_vnet_hdr) {
+    if (s->host_vnet_hdr_len) {
         iov[iovcnt].iov_base = &hdr;
-        iov[iovcnt].iov_len  = sizeof(hdr);
+        iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
         iovcnt++;
     }
 
@@ -159,7 +159,7 @@ static ssize_t tap_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
     struct iovec iov[1];
 
-    if (s->has_vnet_hdr && !s->using_vnet_hdr) {
+    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
         return tap_receive_raw(nc, buf, size);
     }
 
@@ -202,9 +202,9 @@ static void tap_send(void *opaque)
             break;
         }
 
-        if (s->has_vnet_hdr && !s->using_vnet_hdr) {
-            buf  += sizeof(struct virtio_net_hdr);
-            size -= sizeof(struct virtio_net_hdr);
+        if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
+            buf  += s->host_vnet_hdr_len;
+            size -= s->host_vnet_hdr_len;
         }
 
         size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
@@ -229,7 +229,28 @@ int tap_has_vnet_hdr(VLANClientState *nc)
 
     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
 
-    return s->has_vnet_hdr;
+    return !!s->host_vnet_hdr_len;
+}
+
+int tap_has_vnet_hdr_len(VLANClientState *nc, int len)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+
+    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
+
+    return tap_probe_vnet_hdr_len(s->fd, len);
+}
+
+void tap_set_vnet_hdr_len(VLANClientState *nc, int len)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+
+    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
+    assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
+           len == sizeof(struct virtio_net_hdr));
+
+    tap_fd_set_vnet_hdr_len(s->fd, len);
+    s->host_vnet_hdr_len = len;
 }
 
 void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
@@ -239,7 +260,7 @@ void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
     using_vnet_hdr = using_vnet_hdr != 0;
 
     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
-    assert(s->has_vnet_hdr == using_vnet_hdr);
+    assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
 
     s->using_vnet_hdr = using_vnet_hdr;
 }
@@ -310,7 +331,7 @@ static TAPState *net_tap_fd_init(VLANState *vlan,
     s = DO_UPCAST(TAPState, nc, nc);
 
     s->fd = fd;
-    s->has_vnet_hdr = vnet_hdr != 0;
+    s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
     s->using_vnet_hdr = 0;
     s->has_ufo = tap_probe_has_ufo(s->fd);
     tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
diff --git a/net/tap.h b/net/tap.h
index b8cec8320c..e44bd2bc5f 100644
--- a/net/tap.h
+++ b/net/tap.h
@@ -40,13 +40,17 @@ ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
 
 int tap_has_ufo(VLANClientState *vc);
 int tap_has_vnet_hdr(VLANClientState *vc);
+int tap_has_vnet_hdr_len(VLANClientState *vc, int len);
 void tap_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr);
 void tap_set_offload(VLANClientState *vc, int csum, int tso4, int tso6, int ecn, int ufo);
+void tap_set_vnet_hdr_len(VLANClientState *vc, int len);
 
 int tap_set_sndbuf(int fd, QemuOpts *opts);
 int tap_probe_vnet_hdr(int fd);
+int tap_probe_vnet_hdr_len(int fd, int len);
 int tap_probe_has_ufo(int fd);
 void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
+void tap_fd_set_vnet_hdr_len(int fd, int len);
 
 int tap_get_fd(VLANClientState *vc);