From 605d52e62fc94ec28408890fc682d4bdd7d8a36c Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:39 +0300 Subject: net_pkt: Name vmxnet3 packet abstractions more generic This patch drops "vmx" prefix from packet abstractions names to emphasize the fact they are generic and not tied to any specific network device. These abstractions will be reused by e1000e emulation implementation introduced by following patches so their names need generalization. This patch (except renamed files, adjusted comments and changes in MAINTAINTERS) was produced by: git grep -lz 'vmxnet_tx_pkt' | xargs -0 perl -i'' -pE "s/vmxnet_tx_pkt/net_tx_pkt/g" git grep -lz 'vmxnet_rx_pkt' | xargs -0 perl -i'' -pE "s/vmxnet_rx_pkt/net_rx_pkt/g" git grep -lz 'VmxnetTxPkt' | xargs -0 perl -i'' -pE "s/VmxnetTxPkt/NetTxPkt/g" git grep -lz 'VMXNET_TX_PKT' | xargs -0 perl -i'' -pE "s/VMXNET_TX_PKT/NET_TX_PKT/g" git grep -lz 'VmxnetRxPkt' | xargs -0 perl -i'' -pE "s/VmxnetRxPkt/NetRxPkt/g" git grep -lz 'VMXNET_RX_PKT' | xargs -0 perl -i'' -pE "s/VMXNET_RX_PKT/NET_RX_PKT/g" sed -ie 's/VMXNET_/NET_/g' hw/net/vmxnet_rx_pkt.c sed -ie 's/VMXNET_/NET_/g' hw/net/vmxnet_tx_pkt.c Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/net_tx_pkt.c | 581 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 581 insertions(+) create mode 100644 hw/net/net_tx_pkt.c (limited to 'hw/net/net_tx_pkt.c') diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c new file mode 100644 index 0000000000..94c7e3d3f6 --- /dev/null +++ b/hw/net/net_tx_pkt.c @@ -0,0 +1,581 @@ +/* + * QEMU TX packets abstractions + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman + * Tamir Shomer + * Yan Vugenfirer + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "hw/hw.h" +#include "net_tx_pkt.h" +#include "net/eth.h" +#include "qemu-common.h" +#include "qemu/iov.h" +#include "net/checksum.h" +#include "net/tap.h" +#include "net/net.h" + +enum { + NET_TX_PKT_VHDR_FRAG = 0, + NET_TX_PKT_L2HDR_FRAG, + NET_TX_PKT_L3HDR_FRAG, + NET_TX_PKT_PL_START_FRAG +}; + +/* TX packet private context */ +struct NetTxPkt { + struct virtio_net_hdr virt_hdr; + bool has_virt_hdr; + + struct iovec *raw; + uint32_t raw_frags; + uint32_t max_raw_frags; + + struct iovec *vec; + + uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN]; + + uint32_t payload_len; + + uint32_t payload_frags; + uint32_t max_payload_frags; + + uint16_t hdr_len; + eth_pkt_types_e packet_type; + uint8_t l4proto; +}; + +void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags, + bool has_virt_hdr) +{ + struct NetTxPkt *p = g_malloc0(sizeof *p); + + p->vec = g_malloc((sizeof *p->vec) * + (max_frags + NET_TX_PKT_PL_START_FRAG)); + + p->raw = g_malloc((sizeof *p->raw) * max_frags); + + p->max_payload_frags = max_frags; + p->max_raw_frags = max_frags; + p->has_virt_hdr = has_virt_hdr; + p->vec[NET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr; + p->vec[NET_TX_PKT_VHDR_FRAG].iov_len = + p->has_virt_hdr ? sizeof p->virt_hdr : 0; + p->vec[NET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr; + p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = NULL; + p->vec[NET_TX_PKT_L3HDR_FRAG].iov_len = 0; + + *pkt = p; +} + +void net_tx_pkt_uninit(struct NetTxPkt *pkt) +{ + if (pkt) { + g_free(pkt->vec); + g_free(pkt->raw); + g_free(pkt); + } +} + +void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt) +{ + uint16_t csum; + uint32_t ph_raw_csum; + assert(pkt); + uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; + struct ip_header *ip_hdr; + + if (VIRTIO_NET_HDR_GSO_TCPV4 != gso_type && + VIRTIO_NET_HDR_GSO_UDP != gso_type) { + return; + } + + ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; + + if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len > + ETH_MAX_IP_DGRAM_LEN) { + return; + } + + ip_hdr->ip_len = cpu_to_be16(pkt->payload_len + + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); + + /* Calculate IP header checksum */ + ip_hdr->ip_sum = 0; + csum = net_raw_checksum((uint8_t *)ip_hdr, + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); + ip_hdr->ip_sum = cpu_to_be16(csum); + + /* Calculate IP pseudo header checksum */ + ph_raw_csum = eth_calc_pseudo_hdr_csum(ip_hdr, pkt->payload_len); + csum = cpu_to_be16(~net_checksum_finish(ph_raw_csum)); + iov_from_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, + pkt->virt_hdr.csum_offset, &csum, sizeof(csum)); +} + +static void net_tx_pkt_calculate_hdr_len(struct NetTxPkt *pkt) +{ + pkt->hdr_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len + + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len; +} + +static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) +{ + struct iovec *l2_hdr, *l3_hdr; + size_t bytes_read; + size_t full_ip6hdr_len; + uint16_t l3_proto; + + assert(pkt); + + l2_hdr = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; + l3_hdr = &pkt->vec[NET_TX_PKT_L3HDR_FRAG]; + + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base, + ETH_MAX_L2_HDR_LEN); + if (bytes_read < sizeof(struct eth_header)) { + l2_hdr->iov_len = 0; + return false; + } + + l2_hdr->iov_len = sizeof(struct eth_header); + switch (be16_to_cpu(PKT_GET_ETH_HDR(l2_hdr->iov_base)->h_proto)) { + case ETH_P_VLAN: + l2_hdr->iov_len += sizeof(struct vlan_header); + break; + case ETH_P_DVLAN: + l2_hdr->iov_len += 2 * sizeof(struct vlan_header); + break; + } + + if (bytes_read < l2_hdr->iov_len) { + l2_hdr->iov_len = 0; + return false; + } + + l3_proto = eth_get_l3_proto(l2_hdr->iov_base, l2_hdr->iov_len); + + switch (l3_proto) { + case ETH_P_IP: + l3_hdr->iov_base = g_malloc(ETH_MAX_IP4_HDR_LEN); + + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, + l3_hdr->iov_base, sizeof(struct ip_header)); + + if (bytes_read < sizeof(struct ip_header)) { + l3_hdr->iov_len = 0; + return false; + } + + l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base); + pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p; + + /* copy optional IPv4 header data */ + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, + l2_hdr->iov_len + sizeof(struct ip_header), + l3_hdr->iov_base + sizeof(struct ip_header), + l3_hdr->iov_len - sizeof(struct ip_header)); + if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { + l3_hdr->iov_len = 0; + return false; + } + break; + + case ETH_P_IPV6: + if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, + &pkt->l4proto, &full_ip6hdr_len)) { + l3_hdr->iov_len = 0; + return false; + } + + l3_hdr->iov_base = g_malloc(full_ip6hdr_len); + + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, + l3_hdr->iov_base, full_ip6hdr_len); + + if (bytes_read < full_ip6hdr_len) { + l3_hdr->iov_len = 0; + return false; + } else { + l3_hdr->iov_len = full_ip6hdr_len; + } + break; + + default: + l3_hdr->iov_len = 0; + break; + } + + net_tx_pkt_calculate_hdr_len(pkt); + pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); + return true; +} + +static bool net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt) +{ + size_t payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; + + pkt->payload_frags = iov_copy(&pkt->vec[NET_TX_PKT_PL_START_FRAG], + pkt->max_payload_frags, + pkt->raw, pkt->raw_frags, + pkt->hdr_len, payload_len); + + if (pkt->payload_frags != (uint32_t) -1) { + pkt->payload_len = payload_len; + return true; + } else { + return false; + } +} + +bool net_tx_pkt_parse(struct NetTxPkt *pkt) +{ + return net_tx_pkt_parse_headers(pkt) && + net_tx_pkt_rebuild_payload(pkt); +} + +struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt) +{ + assert(pkt); + return &pkt->virt_hdr; +} + +static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt *pkt, + bool tso_enable) +{ + uint8_t rc = VIRTIO_NET_HDR_GSO_NONE; + uint16_t l3_proto; + + l3_proto = eth_get_l3_proto(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, + pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len); + + if (!tso_enable) { + goto func_exit; + } + + rc = eth_get_gso_type(l3_proto, pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base, + pkt->l4proto); + +func_exit: + return rc; +} + +void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, + bool csum_enable, uint32_t gso_size) +{ + struct tcp_hdr l4hdr; + assert(pkt); + + /* csum has to be enabled if tso is. */ + assert(csum_enable || !tso_enable); + + pkt->virt_hdr.gso_type = net_tx_pkt_get_gso_type(pkt, tso_enable); + + switch (pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_NONE: + pkt->virt_hdr.hdr_len = 0; + pkt->virt_hdr.gso_size = 0; + break; + + case VIRTIO_NET_HDR_GSO_UDP: + pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); + pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header); + break; + + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_TCPV6: + iov_to_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, + 0, &l4hdr, sizeof(l4hdr)); + pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t); + pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); + break; + + default: + g_assert_not_reached(); + } + + if (csum_enable) { + switch (pkt->l4proto) { + case IP_PROTO_TCP: + pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + pkt->virt_hdr.csum_start = pkt->hdr_len; + pkt->virt_hdr.csum_offset = offsetof(struct tcp_hdr, th_sum); + break; + case IP_PROTO_UDP: + pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + pkt->virt_hdr.csum_start = pkt->hdr_len; + pkt->virt_hdr.csum_offset = offsetof(struct udp_hdr, uh_sum); + break; + default: + break; + } + } +} + +void net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan) +{ + bool is_new; + assert(pkt); + + eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, + vlan, &is_new); + + /* update l2hdrlen */ + if (is_new) { + pkt->hdr_len += sizeof(struct vlan_header); + pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len += + sizeof(struct vlan_header); + } +} + +bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, + size_t len) +{ + hwaddr mapped_len = 0; + struct iovec *ventry; + assert(pkt); + assert(pkt->max_raw_frags > pkt->raw_frags); + + if (!len) { + return true; + } + + ventry = &pkt->raw[pkt->raw_frags]; + mapped_len = len; + + ventry->iov_base = cpu_physical_memory_map(pa, &mapped_len, false); + ventry->iov_len = mapped_len; + pkt->raw_frags += !!ventry->iov_base; + + if ((ventry->iov_base == NULL) || (len != mapped_len)) { + return false; + } + + return true; +} + +eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt) +{ + assert(pkt); + + return pkt->packet_type; +} + +size_t net_tx_pkt_get_total_len(struct NetTxPkt *pkt) +{ + assert(pkt); + + return pkt->hdr_len + pkt->payload_len; +} + +void net_tx_pkt_dump(struct NetTxPkt *pkt) +{ +#ifdef NET_TX_PKT_DEBUG + assert(pkt); + + printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, " + "l3hdr_len: %lu, payload_len: %u\n", pkt->hdr_len, pkt->packet_type, + pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len, + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len); +#endif +} + +void net_tx_pkt_reset(struct NetTxPkt *pkt) +{ + int i; + + /* no assert, as reset can be called before tx_pkt_init */ + if (!pkt) { + return; + } + + memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr)); + + g_free(pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base); + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = NULL; + + assert(pkt->vec); + for (i = NET_TX_PKT_L2HDR_FRAG; + i < pkt->payload_frags + NET_TX_PKT_PL_START_FRAG; i++) { + pkt->vec[i].iov_len = 0; + } + pkt->payload_len = 0; + pkt->payload_frags = 0; + + assert(pkt->raw); + for (i = 0; i < pkt->raw_frags; i++) { + assert(pkt->raw[i].iov_base); + cpu_physical_memory_unmap(pkt->raw[i].iov_base, pkt->raw[i].iov_len, + false, pkt->raw[i].iov_len); + pkt->raw[i].iov_len = 0; + } + pkt->raw_frags = 0; + + pkt->hdr_len = 0; + pkt->packet_type = 0; + pkt->l4proto = 0; +} + +static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt) +{ + struct iovec *iov = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; + uint32_t csum_cntr; + uint16_t csum = 0; + /* num of iovec without vhdr */ + uint32_t iov_len = pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1; + uint16_t csl; + struct ip_header *iphdr; + size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset; + + /* Put zero to checksum field */ + iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); + + /* Calculate L4 TCP/UDP checksum */ + csl = pkt->payload_len; + + /* data checksum */ + csum_cntr = + net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl); + /* add pseudo header to csum */ + iphdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; + csum_cntr += eth_calc_pseudo_hdr_csum(iphdr, csl); + + /* Put the checksum obtained into the packet */ + csum = cpu_to_be16(net_checksum_finish(csum_cntr)); + iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); +} + +enum { + NET_TX_PKT_FRAGMENT_L2_HDR_POS = 0, + NET_TX_PKT_FRAGMENT_L3_HDR_POS, + NET_TX_PKT_FRAGMENT_HEADER_NUM +}; + +#define NET_MAX_FRAG_SG_LIST (64) + +static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, + int *src_idx, size_t *src_offset, struct iovec *dst, int *dst_idx) +{ + size_t fetched = 0; + struct iovec *src = pkt->vec; + + *dst_idx = NET_TX_PKT_FRAGMENT_HEADER_NUM; + + while (fetched < pkt->virt_hdr.gso_size) { + + /* no more place in fragment iov */ + if (*dst_idx == NET_MAX_FRAG_SG_LIST) { + break; + } + + /* no more data in iovec */ + if (*src_idx == (pkt->payload_frags + NET_TX_PKT_PL_START_FRAG)) { + break; + } + + + dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset; + dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset, + pkt->virt_hdr.gso_size - fetched); + + *src_offset += dst[*dst_idx].iov_len; + fetched += dst[*dst_idx].iov_len; + + if (*src_offset == src[*src_idx].iov_len) { + *src_offset = 0; + (*src_idx)++; + } + + (*dst_idx)++; + } + + return fetched; +} + +static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt, + NetClientState *nc) +{ + struct iovec fragment[NET_MAX_FRAG_SG_LIST]; + size_t fragment_len = 0; + bool more_frags = false; + + /* some pointers for shorter code */ + void *l2_iov_base, *l3_iov_base; + size_t l2_iov_len, l3_iov_len; + int src_idx = NET_TX_PKT_PL_START_FRAG, dst_idx; + size_t src_offset = 0; + size_t fragment_offset = 0; + + l2_iov_base = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base; + l2_iov_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len; + l3_iov_base = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; + l3_iov_len = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len; + + /* Copy headers */ + fragment[NET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_base = l2_iov_base; + fragment[NET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_len = l2_iov_len; + fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_base = l3_iov_base; + fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_len = l3_iov_len; + + + /* Put as much data as possible and send */ + do { + fragment_len = net_tx_pkt_fetch_fragment(pkt, &src_idx, &src_offset, + fragment, &dst_idx); + + more_frags = (fragment_offset + fragment_len < pkt->payload_len); + + eth_setup_ip4_fragmentation(l2_iov_base, l2_iov_len, l3_iov_base, + l3_iov_len, fragment_len, fragment_offset, more_frags); + + eth_fix_ip4_checksum(l3_iov_base, l3_iov_len); + + qemu_sendv_packet(nc, fragment, dst_idx); + + fragment_offset += fragment_len; + + } while (more_frags); + + return true; +} + +bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc) +{ + assert(pkt); + + if (!pkt->has_virt_hdr && + pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + net_tx_pkt_do_sw_csum(pkt); + } + + /* + * Since underlying infrastructure does not support IP datagrams longer + * than 64K we should drop such packets and don't even try to send + */ + if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) { + if (pkt->payload_len > + ETH_MAX_IP_DGRAM_LEN - + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) { + return false; + } + } + + if (pkt->has_virt_hdr || + pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) { + qemu_sendv_packet(nc, pkt->vec, + pkt->payload_frags + NET_TX_PKT_PL_START_FRAG); + return true; + } + + return net_tx_pkt_do_sw_fragmentation(pkt, nc); +} -- cgit 1.4.1 From eb700029c7836798046191d62d595363d92c84d4 Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:41 +0300 Subject: net_pkt: Extend packet abstraction as required by e1000e functionality This patch extends the TX/RX packet abstractions with features that will be used by the e1000e device implementation. Changes are: 1. Support iovec lists for RX buffers 2. Deeper RX packets parsing 3. Loopback option for TX packets 4. Extended VLAN headers handling 5. RSS processing for RX packets Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/net_rx_pkt.c | 473 +++++++++++++++++++++++++++++++++++++++++++++---- hw/net/net_rx_pkt.h | 193 +++++++++++++++++++- hw/net/net_tx_pkt.c | 204 +++++++++++++-------- hw/net/net_tx_pkt.h | 60 ++++++- include/net/checksum.h | 4 +- include/net/eth.h | 150 +++++++++++----- net/checksum.c | 7 +- net/eth.c | 410 +++++++++++++++++++++++++++++++++++++----- trace-events | 40 +++++ 9 files changed, 1336 insertions(+), 205 deletions(-) (limited to 'hw/net/net_tx_pkt.c') diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c index 8a4f29fe04..1019b50c18 100644 --- a/hw/net/net_rx_pkt.c +++ b/hw/net/net_rx_pkt.c @@ -16,24 +16,16 @@ */ #include "qemu/osdep.h" +#include "trace.h" #include "net_rx_pkt.h" -#include "net/eth.h" -#include "qemu-common.h" -#include "qemu/iov.h" #include "net/checksum.h" #include "net/tap.h" -/* - * RX packet may contain up to 2 fragments - rebuilt eth header - * in case of VLAN tag stripping - * and payload received from QEMU - in any case - */ -#define NET_MAX_RX_PACKET_FRAGMENTS (2) - struct NetRxPkt { struct virtio_net_hdr virt_hdr; - uint8_t ehdr_buf[ETH_MAX_L2_HDR_LEN]; - struct iovec vec[NET_MAX_RX_PACKET_FRAGMENTS]; + uint8_t ehdr_buf[sizeof(struct eth_header)]; + struct iovec *vec; + uint16_t vec_len_total; uint16_t vec_len; uint32_t tot_len; uint16_t tci; @@ -46,17 +38,31 @@ struct NetRxPkt { bool isip6; bool isudp; bool istcp; + + size_t l3hdr_off; + size_t l4hdr_off; + size_t l5hdr_off; + + eth_ip6_hdr_info ip6hdr_info; + eth_ip4_hdr_info ip4hdr_info; + eth_l4_hdr_info l4hdr_info; }; void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr) { struct NetRxPkt *p = g_malloc0(sizeof *p); p->has_virt_hdr = has_virt_hdr; + p->vec = NULL; + p->vec_len_total = 0; *pkt = p; } void net_rx_pkt_uninit(struct NetRxPkt *pkt) { + if (pkt->vec_len_total != 0) { + g_free(pkt->vec); + } + g_free(pkt); } @@ -66,33 +72,88 @@ struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt) return &pkt->virt_hdr; } -void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, - size_t len, bool strip_vlan) +static inline void +net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt, + int new_iov_len) +{ + if (pkt->vec_len_total < new_iov_len) { + g_free(pkt->vec); + pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len); + pkt->vec_len_total = new_iov_len; + } +} + +static void +net_rx_pkt_pull_data(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t ploff) +{ + if (pkt->vlan_stripped) { + net_rx_pkt_iovec_realloc(pkt, iovcnt + 1); + + pkt->vec[0].iov_base = pkt->ehdr_buf; + pkt->vec[0].iov_len = sizeof(pkt->ehdr_buf); + + pkt->tot_len = + iov_size(iov, iovcnt) - ploff + sizeof(struct eth_header); + + pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1, + iov, iovcnt, ploff, pkt->tot_len); + } else { + net_rx_pkt_iovec_realloc(pkt, iovcnt); + + pkt->tot_len = iov_size(iov, iovcnt) - ploff; + pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total, + iov, iovcnt, ploff, pkt->tot_len); + } + + eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->isip4, &pkt->isip6, + &pkt->isudp, &pkt->istcp, + &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, + &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); + + trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp, + pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off); +} + +void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t iovoff, bool strip_vlan) { uint16_t tci = 0; - uint16_t ploff; + uint16_t ploff = iovoff; assert(pkt); pkt->vlan_stripped = false; if (strip_vlan) { - pkt->vlan_stripped = eth_strip_vlan(data, pkt->ehdr_buf, &ploff, &tci); + pkt->vlan_stripped = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf, + &ploff, &tci); } - if (pkt->vlan_stripped) { - pkt->vec[0].iov_base = pkt->ehdr_buf; - pkt->vec[0].iov_len = ploff - sizeof(struct vlan_header); - pkt->vec[1].iov_base = (uint8_t *) data + ploff; - pkt->vec[1].iov_len = len - ploff; - pkt->vec_len = 2; - pkt->tot_len = len - ploff + sizeof(struct eth_header); - } else { - pkt->vec[0].iov_base = (void *)data; - pkt->vec[0].iov_len = len; - pkt->vec_len = 1; - pkt->tot_len = len; + pkt->tci = tci; + + net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); +} + +void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t iovoff, bool strip_vlan, + uint16_t vet) +{ + uint16_t tci = 0; + uint16_t ploff = iovoff; + assert(pkt); + pkt->vlan_stripped = false; + + if (strip_vlan) { + pkt->vlan_stripped = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet, + pkt->ehdr_buf, + &ploff, &tci); } pkt->tci = tci; + + net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); } void net_rx_pkt_dump(struct NetRxPkt *pkt) @@ -132,10 +193,17 @@ size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt) void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data, size_t len) { + const struct iovec iov = { + .iov_base = (void *)data, + .iov_len = len + }; + assert(pkt); - eth_get_protocols(data, len, &pkt->isip4, &pkt->isip6, - &pkt->isudp, &pkt->istcp); + eth_get_protocols(&iov, 1, &pkt->isip4, &pkt->isip6, + &pkt->isudp, &pkt->istcp, + &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, + &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); } void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, @@ -150,6 +218,180 @@ void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, *istcp = pkt->istcp; } +size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt) +{ + assert(pkt); + return pkt->l3hdr_off; +} + +size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt) +{ + assert(pkt); + return pkt->l4hdr_off; +} + +size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt) +{ + assert(pkt); + return pkt->l5hdr_off; +} + +eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt) +{ + return &pkt->ip6hdr_info; +} + +eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt) +{ + return &pkt->ip4hdr_info; +} + +eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt) +{ + return &pkt->l4hdr_info; +} + +static inline void +_net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written, + void *ptr, size_t size) +{ + memcpy(&rss_input[*bytes_written], ptr, size); + trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written); + *bytes_written += size; +} + +static inline void +_net_rx_rss_prepare_ip4(uint8_t *rss_input, + struct NetRxPkt *pkt, + size_t *bytes_written) +{ + struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr; + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &ip4_hdr->ip_src, sizeof(uint32_t)); + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &ip4_hdr->ip_dst, sizeof(uint32_t)); +} + +static inline void +_net_rx_rss_prepare_ip6(uint8_t *rss_input, + struct NetRxPkt *pkt, + bool ipv6ex, size_t *bytes_written) +{ + eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info; + + _net_rx_rss_add_chunk(rss_input, bytes_written, + (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src + : &ip6info->ip6_hdr.ip6_src, + sizeof(struct in6_address)); + + _net_rx_rss_add_chunk(rss_input, bytes_written, + (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst + : &ip6info->ip6_hdr.ip6_dst, + sizeof(struct in6_address)); +} + +static inline void +_net_rx_rss_prepare_tcp(uint8_t *rss_input, + struct NetRxPkt *pkt, + size_t *bytes_written) +{ + struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp; + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &tcphdr->th_sport, sizeof(uint16_t)); + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &tcphdr->th_dport, sizeof(uint16_t)); +} + +uint32_t +net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt, + NetRxPktRssType type, + uint8_t *key) +{ + uint8_t rss_input[36]; + size_t rss_length = 0; + uint32_t rss_hash = 0; + net_toeplitz_key key_data; + + switch (type) { + case NetPktRssIpV4: + assert(pkt->isip4); + trace_net_rx_pkt_rss_ip4(); + _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); + break; + case NetPktRssIpV4Tcp: + assert(pkt->isip4); + assert(pkt->istcp); + trace_net_rx_pkt_rss_ip4_tcp(); + _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); + _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); + break; + case NetPktRssIpV6Tcp: + assert(pkt->isip6); + assert(pkt->istcp); + trace_net_rx_pkt_rss_ip6_tcp(); + _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); + _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); + break; + case NetPktRssIpV6: + assert(pkt->isip6); + trace_net_rx_pkt_rss_ip6(); + _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length); + break; + case NetPktRssIpV6Ex: + assert(pkt->isip6); + trace_net_rx_pkt_rss_ip6_ex(); + _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); + break; + default: + assert(false); + break; + } + + net_toeplitz_key_init(&key_data, key); + net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data); + + trace_net_rx_pkt_rss_hash(rss_length, rss_hash); + + return rss_hash; +} + +uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt) +{ + assert(pkt); + + if (pkt->isip4) { + return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id); + } + + return 0; +} + +bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt) +{ + assert(pkt); + + if (pkt->istcp) { + return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK; + } + + return false; +} + +bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt) +{ + assert(pkt); + + if (pkt->istcp) { + return pkt->l4hdr_info.has_tcp_data; + } + + return false; +} + struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt) { assert(pkt); @@ -157,6 +399,13 @@ struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt) return pkt->vec; } +uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->vec_len; +} + void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, struct virtio_net_hdr *vhdr) { @@ -165,6 +414,14 @@ void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr); } +void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt) +{ + assert(pkt); + + iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr); +} + bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt) { assert(pkt); @@ -185,3 +442,159 @@ uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt) return pkt->tci; } + +bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid) +{ + uint32_t cntr; + uint16_t csum; + uint32_t csl; + + trace_net_rx_pkt_l3_csum_validate_entry(); + + if (!pkt->isip4) { + trace_net_rx_pkt_l3_csum_validate_not_ip4(); + return false; + } + + csl = pkt->l4hdr_off - pkt->l3hdr_off; + + cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len, + pkt->l3hdr_off, + csl, 0); + + csum = net_checksum_finish(cntr); + + *csum_valid = (csum == 0); + + trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl, + cntr, csum, *csum_valid); + + return true; +} + +static uint16_t +_net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt) +{ + uint32_t cntr; + uint16_t csum; + uint16_t csl; + uint32_t cso; + + trace_net_rx_pkt_l4_csum_calc_entry(); + + if (pkt->isip4) { + if (pkt->isudp) { + csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); + trace_net_rx_pkt_l4_csum_calc_ip4_udp(); + } else { + csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) - + IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr); + trace_net_rx_pkt_l4_csum_calc_ip4_tcp(); + } + + cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr, + csl, &cso); + trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); + } else { + if (pkt->isudp) { + csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); + trace_net_rx_pkt_l4_csum_calc_ip6_udp(); + } else { + struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr; + size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off; + size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header); + + csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - + ip6opts_len; + trace_net_rx_pkt_l4_csum_calc_ip6_tcp(); + } + + cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl, + pkt->ip6hdr_info.l4proto, &cso); + trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); + } + + cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len, + pkt->l4hdr_off, csl, cso); + + csum = net_checksum_finish(cntr); + + trace_net_rx_pkt_l4_csum_calc_csum(pkt->l4hdr_off, csl, cntr, csum); + + return csum; +} + +bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid) +{ + uint16_t csum; + + trace_net_rx_pkt_l4_csum_validate_entry(); + + if (!pkt->istcp && !pkt->isudp) { + trace_net_rx_pkt_l4_csum_validate_not_xxp(); + return false; + } + + if (pkt->isudp && (pkt->l4hdr_info.hdr.udp.uh_sum == 0)) { + trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum(); + return false; + } + + if (pkt->isip4 && pkt->ip4hdr_info.fragment) { + trace_net_rx_pkt_l4_csum_validate_ip4_fragment(); + return false; + } + + csum = _net_rx_pkt_calc_l4_csum(pkt); + + *csum_valid = ((csum == 0) || (csum == 0xFFFF)); + + trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid); + + return true; +} + +bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt) +{ + uint16_t csum = 0; + uint32_t l4_cso; + + trace_net_rx_pkt_l4_csum_fix_entry(); + + if (pkt->istcp) { + l4_cso = offsetof(struct tcp_header, th_sum); + trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso); + } else if (pkt->isudp) { + if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) { + trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum(); + return false; + } + l4_cso = offsetof(struct udp_header, uh_sum); + trace_net_rx_pkt_l4_csum_fix_udp(l4_cso); + } else { + trace_net_rx_pkt_l4_csum_fix_not_xxp(); + return false; + } + + if (pkt->isip4 && pkt->ip4hdr_info.fragment) { + trace_net_rx_pkt_l4_csum_fix_ip4_fragment(); + return false; + } + + /* Set zero to checksum word */ + iov_from_buf(pkt->vec, pkt->vec_len, + pkt->l4hdr_off + l4_cso, + &csum, sizeof(csum)); + + /* Calculate L4 checksum */ + csum = cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt)); + + /* Set calculated checksum to checksum word */ + iov_from_buf(pkt->vec, pkt->vec_len, + pkt->l4hdr_off + l4_cso, + &csum, sizeof(csum)); + + trace_net_rx_pkt_l4_csum_fix_csum(pkt->l4hdr_off + l4_cso, csum); + + return true; +} diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h index 897330a157..7adf0fad51 100644 --- a/hw/net/net_rx_pkt.h +++ b/hw/net/net_rx_pkt.h @@ -77,6 +77,103 @@ void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, bool *isip4, bool *isip6, bool *isudp, bool *istcp); +/** +* fetches L3 header offset +* +* @pkt: packet +* +*/ +size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt); + +/** +* fetches L4 header offset +* +* @pkt: packet +* +*/ +size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt); + +/** +* fetches L5 header offset +* +* @pkt: packet +* +*/ +size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt); + +/** + * fetches IP6 header analysis results + * + * Return: pointer to analysis results structure which is stored in internal + * packet area. + * + */ +eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt); + +/** + * fetches IP4 header analysis results + * + * Return: pointer to analysis results structure which is stored in internal + * packet area. + * + */ +eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt); + +/** + * fetches L4 header analysis results + * + * Return: pointer to analysis results structure which is stored in internal + * packet area. + * + */ +eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt); + +typedef enum { + NetPktRssIpV4, + NetPktRssIpV4Tcp, + NetPktRssIpV6Tcp, + NetPktRssIpV6, + NetPktRssIpV6Ex +} NetRxPktRssType; + +/** +* calculates RSS hash for packet +* +* @pkt: packet +* @type: RSS hash type +* +* Return: Toeplitz RSS hash. +* +*/ +uint32_t +net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt, + NetRxPktRssType type, + uint8_t *key); + +/** +* fetches IP identification for the packet +* +* @pkt: packet +* +*/ +uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt); + +/** +* check if given packet is a TCP ACK packet +* +* @pkt: packet +* +*/ +bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt); + +/** +* check if given packet contains TCP data +* +* @pkt: packet +* +*/ +bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt); + /** * returns virtio header stored in rx context * @@ -122,6 +219,37 @@ bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt); */ bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt); +/** +* attach scatter-gather data to rx packet +* +* @pkt: packet +* @iov: received data scatter-gather list +* @iovcnt number of elements in iov +* @iovoff data start offset in the iov +* @strip_vlan: should the module strip vlan from data +* +*/ +void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, + int iovcnt, size_t iovoff, + bool strip_vlan); + +/** +* attach scatter-gather data to rx packet +* +* @pkt: packet +* @iov: received data scatter-gather list +* @iovcnt number of elements in iov +* @iovoff data start offset in the iov +* @strip_vlan: should the module strip vlan from data +* @vet: VLAN tag Ethernet type +* +*/ +void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t iovoff, bool strip_vlan, + uint16_t vet); + /** * attach data to rx packet * @@ -131,8 +259,17 @@ bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt); * @strip_vlan: should the module strip vlan from data * */ -void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, - size_t len, bool strip_vlan); +static inline void +net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, + size_t len, bool strip_vlan) +{ + const struct iovec iov = { + .iov_base = (void *) data, + .iov_len = len + }; + + net_rx_pkt_attach_iovec(pkt, &iov, 1, 0, strip_vlan); +} /** * returns io vector that holds the attached data @@ -143,6 +280,15 @@ void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, */ struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt); +/** +* returns io vector length that holds the attached data +* +* @pkt: packet +* @ret: IOVec length +* +*/ +uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt); + /** * prints rx packet data if debug is enabled * @@ -161,6 +307,17 @@ void net_rx_pkt_dump(struct NetRxPkt *pkt); void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, struct virtio_net_hdr *vhdr); +/** +* copy passed vhdr data to packet context +* +* @pkt: packet +* @iov: VHDR iov +* @iovcnt: VHDR iov array size +* +*/ +void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt); + /** * save packet type in packet context * @@ -171,4 +328,36 @@ void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt, eth_pkt_types_e packet_type); +/** +* validate TCP/UDP checksum of the packet +* +* @pkt: packet +* @csum_valid: checksum validation result +* @ret: true if validation was performed, false in case packet is +* not TCP/UDP or checksum validation is not possible +* +*/ +bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid); + +/** +* validate IPv4 checksum of the packet +* +* @pkt: packet +* @csum_valid: checksum validation result +* @ret: true if validation was performed, false in case packet is +* not TCP/UDP or checksum validation is not possible +* +*/ +bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid); + +/** +* fix IPv4 checksum of the packet +* +* @pkt: packet +* @ret: true if checksum was fixed, false in case packet is +* not TCP/UDP or checksum correction is not possible +* +*/ +bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt); + #endif diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c index 94c7e3d3f6..a64f51cbef 100644 --- a/hw/net/net_tx_pkt.c +++ b/hw/net/net_tx_pkt.c @@ -15,12 +15,8 @@ * */ -#include "qemu/osdep.h" -#include "hw/hw.h" #include "net_tx_pkt.h" #include "net/eth.h" -#include "qemu-common.h" -#include "qemu/iov.h" #include "net/checksum.h" #include "net/tap.h" #include "net/net.h" @@ -44,6 +40,7 @@ struct NetTxPkt { struct iovec *vec; uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN]; + uint8_t l3_hdr[ETH_MAX_IP_DGRAM_LEN]; uint32_t payload_len; @@ -53,6 +50,8 @@ struct NetTxPkt { uint16_t hdr_len; eth_pkt_types_e packet_type; uint8_t l4proto; + + bool is_loopback; }; void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags, @@ -72,8 +71,7 @@ void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags, p->vec[NET_TX_PKT_VHDR_FRAG].iov_len = p->has_virt_hdr ? sizeof p->virt_hdr : 0; p->vec[NET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr; - p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = NULL; - p->vec[NET_TX_PKT_L3HDR_FRAG].iov_len = 0; + p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = &p->l3_hdr; *pkt = p; } @@ -87,38 +85,52 @@ void net_tx_pkt_uninit(struct NetTxPkt *pkt) } } -void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt) +void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt) { uint16_t csum; - uint32_t ph_raw_csum; assert(pkt); - uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; struct ip_header *ip_hdr; - - if (VIRTIO_NET_HDR_GSO_TCPV4 != gso_type && - VIRTIO_NET_HDR_GSO_UDP != gso_type) { - return; - } - ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; - if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len > - ETH_MAX_IP_DGRAM_LEN) { - return; - } - ip_hdr->ip_len = cpu_to_be16(pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); - /* Calculate IP header checksum */ ip_hdr->ip_sum = 0; csum = net_raw_checksum((uint8_t *)ip_hdr, pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); ip_hdr->ip_sum = cpu_to_be16(csum); +} + +void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt) +{ + uint16_t csum; + uint32_t cntr, cso; + assert(pkt); + uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; + void *ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; + + if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len > + ETH_MAX_IP_DGRAM_LEN) { + return; + } + + if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 || + gso_type == VIRTIO_NET_HDR_GSO_UDP) { + /* Calculate IP header checksum */ + net_tx_pkt_update_ip_hdr_checksum(pkt); + + /* Calculate IP pseudo header checksum */ + cntr = eth_calc_ip4_pseudo_hdr_csum(ip_hdr, pkt->payload_len, &cso); + csum = cpu_to_be16(~net_checksum_finish(cntr)); + } else if (gso_type == VIRTIO_NET_HDR_GSO_TCPV6) { + /* Calculate IP pseudo header checksum */ + cntr = eth_calc_ip6_pseudo_hdr_csum(ip_hdr, pkt->payload_len, + IP_PROTO_TCP, &cso); + csum = cpu_to_be16(~net_checksum_finish(cntr)); + } else { + return; + } - /* Calculate IP pseudo header checksum */ - ph_raw_csum = eth_calc_pseudo_hdr_csum(ip_hdr, pkt->payload_len); - csum = cpu_to_be16(~net_checksum_finish(ph_raw_csum)); iov_from_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, pkt->virt_hdr.csum_offset, &csum, sizeof(csum)); } @@ -160,15 +172,19 @@ static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) if (bytes_read < l2_hdr->iov_len) { l2_hdr->iov_len = 0; + l3_hdr->iov_len = 0; + pkt->packet_type = ETH_PKT_UCAST; return false; + } else { + l2_hdr->iov_len = ETH_MAX_L2_HDR_LEN; + l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base); + pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); } - l3_proto = eth_get_l3_proto(l2_hdr->iov_base, l2_hdr->iov_len); + l3_proto = eth_get_l3_proto(l2_hdr, 1, l2_hdr->iov_len); switch (l3_proto) { case ETH_P_IP: - l3_hdr->iov_base = g_malloc(ETH_MAX_IP4_HDR_LEN); - bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, l3_hdr->iov_base, sizeof(struct ip_header)); @@ -178,27 +194,45 @@ static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) } l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base); - pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p; - /* copy optional IPv4 header data */ - bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, - l2_hdr->iov_len + sizeof(struct ip_header), - l3_hdr->iov_base + sizeof(struct ip_header), - l3_hdr->iov_len - sizeof(struct ip_header)); - if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { + if (l3_hdr->iov_len < sizeof(struct ip_header)) { l3_hdr->iov_len = 0; return false; } + + pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p; + + if (IP_HDR_GET_LEN(l3_hdr->iov_base) != sizeof(struct ip_header)) { + /* copy optional IPv4 header data if any*/ + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, + l2_hdr->iov_len + sizeof(struct ip_header), + l3_hdr->iov_base + sizeof(struct ip_header), + l3_hdr->iov_len - sizeof(struct ip_header)); + if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { + l3_hdr->iov_len = 0; + return false; + } + } + break; case ETH_P_IPV6: + { + eth_ip6_hdr_info hdrinfo; + if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, - &pkt->l4proto, &full_ip6hdr_len)) { + &hdrinfo)) { l3_hdr->iov_len = 0; return false; } - l3_hdr->iov_base = g_malloc(full_ip6hdr_len); + pkt->l4proto = hdrinfo.l4proto; + full_ip6hdr_len = hdrinfo.full_hdr_len; + + if (full_ip6hdr_len > ETH_MAX_IP_DGRAM_LEN) { + l3_hdr->iov_len = 0; + return false; + } bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, l3_hdr->iov_base, full_ip6hdr_len); @@ -210,40 +244,35 @@ static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) l3_hdr->iov_len = full_ip6hdr_len; } break; - + } default: l3_hdr->iov_len = 0; break; } net_tx_pkt_calculate_hdr_len(pkt); - pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); return true; } -static bool net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt) +static void net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt) { - size_t payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; - + pkt->payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; pkt->payload_frags = iov_copy(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->max_payload_frags, pkt->raw, pkt->raw_frags, - pkt->hdr_len, payload_len); + pkt->hdr_len, pkt->payload_len); +} - if (pkt->payload_frags != (uint32_t) -1) { - pkt->payload_len = payload_len; +bool net_tx_pkt_parse(struct NetTxPkt *pkt) +{ + if (net_tx_pkt_parse_headers(pkt)) { + net_tx_pkt_rebuild_payload(pkt); return true; } else { return false; } } -bool net_tx_pkt_parse(struct NetTxPkt *pkt) -{ - return net_tx_pkt_parse_headers(pkt) && - net_tx_pkt_rebuild_payload(pkt); -} - struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt) { assert(pkt); @@ -256,7 +285,7 @@ static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt *pkt, uint8_t rc = VIRTIO_NET_HDR_GSO_NONE; uint16_t l3_proto; - l3_proto = eth_get_l3_proto(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, + l3_proto = eth_get_l3_proto(&pkt->vec[NET_TX_PKT_L2HDR_FRAG], 1, pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len); if (!tso_enable) { @@ -288,7 +317,7 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, break; case VIRTIO_NET_HDR_GSO_UDP: - pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); + pkt->virt_hdr.gso_size = gso_size; pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header); break; @@ -297,7 +326,7 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, iov_to_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, 0, &l4hdr, sizeof(l4hdr)); pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t); - pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); + pkt->virt_hdr.gso_size = gso_size; break; default: @@ -322,13 +351,14 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, } } -void net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan) +void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, + uint16_t vlan, uint16_t vlan_ethtype) { bool is_new; assert(pkt); - eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, - vlan, &is_new); + eth_setup_vlan_headers_ex(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, + vlan, vlan_ethtype, &is_new); /* update l2hdrlen */ if (is_new) { @@ -354,14 +384,19 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, mapped_len = len; ventry->iov_base = cpu_physical_memory_map(pa, &mapped_len, false); - ventry->iov_len = mapped_len; - pkt->raw_frags += !!ventry->iov_base; - if ((ventry->iov_base == NULL) || (len != mapped_len)) { + if ((ventry->iov_base != NULL) && (len == mapped_len)) { + ventry->iov_len = mapped_len; + pkt->raw_frags++; + return true; + } else { return false; } +} - return true; +bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt) +{ + return pkt->raw_frags > 0; } eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt) @@ -401,14 +436,8 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt) memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr)); - g_free(pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base); - pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = NULL; - assert(pkt->vec); - for (i = NET_TX_PKT_L2HDR_FRAG; - i < pkt->payload_frags + NET_TX_PKT_PL_START_FRAG; i++) { - pkt->vec[i].iov_len = 0; - } + pkt->payload_len = 0; pkt->payload_frags = 0; @@ -417,12 +446,10 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt) assert(pkt->raw[i].iov_base); cpu_physical_memory_unmap(pkt->raw[i].iov_base, pkt->raw[i].iov_len, false, pkt->raw[i].iov_len); - pkt->raw[i].iov_len = 0; } pkt->raw_frags = 0; pkt->hdr_len = 0; - pkt->packet_type = 0; pkt->l4proto = 0; } @@ -431,6 +458,7 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt) struct iovec *iov = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; uint32_t csum_cntr; uint16_t csum = 0; + uint32_t cso; /* num of iovec without vhdr */ uint32_t iov_len = pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1; uint16_t csl; @@ -443,12 +471,13 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt) /* Calculate L4 TCP/UDP checksum */ csl = pkt->payload_len; - /* data checksum */ - csum_cntr = - net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl); /* add pseudo header to csum */ iphdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; - csum_cntr += eth_calc_pseudo_hdr_csum(iphdr, csl); + csum_cntr = eth_calc_ip4_pseudo_hdr_csum(iphdr, csl, &cso); + + /* data checksum */ + csum_cntr += + net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl, cso); /* Put the checksum obtained into the packet */ csum = cpu_to_be16(net_checksum_finish(csum_cntr)); @@ -471,7 +500,7 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, *dst_idx = NET_TX_PKT_FRAGMENT_HEADER_NUM; - while (fetched < pkt->virt_hdr.gso_size) { + while (fetched < IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size)) { /* no more place in fragment iov */ if (*dst_idx == NET_MAX_FRAG_SG_LIST) { @@ -486,7 +515,7 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset; dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset, - pkt->virt_hdr.gso_size - fetched); + IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size) - fetched); *src_offset += dst[*dst_idx].iov_len; fetched += dst[*dst_idx].iov_len; @@ -502,6 +531,16 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, return fetched; } +static inline void net_tx_pkt_sendv(struct NetTxPkt *pkt, + NetClientState *nc, const struct iovec *iov, int iov_cnt) +{ + if (pkt->is_loopback) { + nc->info->receive_iov(nc, iov, iov_cnt); + } else { + qemu_sendv_packet(nc, iov, iov_cnt); + } +} + static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt, NetClientState *nc) { @@ -540,7 +579,7 @@ static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt, eth_fix_ip4_checksum(l3_iov_base, l3_iov_len); - qemu_sendv_packet(nc, fragment, dst_idx); + net_tx_pkt_sendv(pkt, nc, fragment, dst_idx); fragment_offset += fragment_len; @@ -572,10 +611,21 @@ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc) if (pkt->has_virt_hdr || pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) { - qemu_sendv_packet(nc, pkt->vec, + net_tx_pkt_sendv(pkt, nc, pkt->vec, pkt->payload_frags + NET_TX_PKT_PL_START_FRAG); return true; } return net_tx_pkt_do_sw_fragmentation(pkt, nc); } + +bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc) +{ + bool res; + + pkt->is_loopback = true; + res = net_tx_pkt_send(pkt, nc); + pkt->is_loopback = false; + + return res; +} diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h index be2e117b7d..e49772d238 100644 --- a/hw/net/net_tx_pkt.h +++ b/hw/net/net_tx_pkt.h @@ -18,6 +18,7 @@ #ifndef NET_TX_PKT_H #define NET_TX_PKT_H +#include "qemu/osdep.h" #include "net/eth.h" #include "exec/hwaddr.h" @@ -64,13 +65,29 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, bool csum_enable, uint32_t gso_size); /** - * updates vlan tag, and adds vlan header in case it is missing - * - * @pkt: packet - * @vlan: VLAN tag - * - */ -void net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan); +* updates vlan tag, and adds vlan header with custom ethernet type +* in case it is missing. +* +* @pkt: packet +* @vlan: VLAN tag +* @vlan_ethtype: VLAN header Ethernet type +* +*/ +void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, + uint16_t vlan, uint16_t vlan_ethtype); + +/** +* updates vlan tag, and adds vlan header in case it is missing +* +* @pkt: packet +* @vlan: VLAN tag +* +*/ +static inline void +net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan) +{ + net_tx_pkt_setup_vlan_header_ex(pkt, vlan, ETH_P_VLAN); +} /** * populate data fragment into pkt context. @@ -84,13 +101,21 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, size_t len); /** - * fix ip header fields and calculate checksums needed. + * Fix ip header fields and calculate IP header and pseudo header checksums. * * @pkt: packet * */ void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt); +/** + * Calculate the IP header checksum. + * + * @pkt: packet + * + */ +void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt); + /** * get length of all populated data. * @@ -135,6 +160,17 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt); */ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc); +/** +* Redirect packet directly to receive path (emulate loopback phy). +* Handles sw offloads if vhdr is not supported. +* +* @pkt: packet +* @nc: NetClientState +* @ret: operation result +* +*/ +bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc); + /** * parse raw packet data and analyze offload requirements. * @@ -143,4 +179,12 @@ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc); */ bool net_tx_pkt_parse(struct NetTxPkt *pkt); +/** +* indicates if there are data fragments held by this packet object. +* +* @pkt: packet +* +*/ +bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt); + #endif diff --git a/include/net/checksum.h b/include/net/checksum.h index dd8b4f6dc2..7df472c2fe 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -46,10 +46,12 @@ net_raw_checksum(uint8_t *data, int length) * @iov_cnt: number of array elements * @iov_off: starting iov offset for checksumming * @size: length of data to be checksummed + * @csum_offset: offset of the checksum chunk */ uint32_t net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt, - uint32_t iov_off, uint32_t size); + uint32_t iov_off, uint32_t size, + uint32_t csum_offset); typedef struct toeplitz_key_st { uint32_t leftmost_32_bits; diff --git a/include/net/eth.h b/include/net/eth.h index 5a32259938..2013175857 100644 --- a/include/net/eth.h +++ b/include/net/eth.h @@ -72,6 +72,8 @@ typedef struct tcp_header { #define TCP_HEADER_FLAGS(tcp) \ TCP_FLAGS_ONLY(be16_to_cpu((tcp)->th_offset_flags)) +#define TCP_FLAG_ACK 0x10 + #define TCP_HEADER_DATA_OFFSET(tcp) \ (((be16_to_cpu((tcp)->th_offset_flags) >> 12) & 0xf) << 2) @@ -116,11 +118,34 @@ struct ip6_header { struct in6_address ip6_dst; /* destination address */ }; +typedef struct ip6_pseudo_header { + struct in6_address ip6_src; + struct in6_address ip6_dst; + uint32_t len; + uint8_t zero[3]; + uint8_t next_hdr; +} ip6_pseudo_header; + struct ip6_ext_hdr { uint8_t ip6r_nxt; /* next header */ uint8_t ip6r_len; /* length in units of 8 octets */ }; +struct ip6_ext_hdr_routing { + uint8_t nxt; + uint8_t len; + uint8_t rtype; + uint8_t segleft; + uint8_t rsvd[4]; +}; + +struct ip6_option_hdr { +#define IP6_OPT_PAD1 (0x00) +#define IP6_OPT_HOME (0xC9) + uint8_t type; + uint8_t len; +}; + struct udp_hdr { uint16_t uh_sport; /* source port */ uint16_t uh_dport; /* destination port */ @@ -169,19 +194,22 @@ struct tcp_hdr { #define PKT_GET_IP_HDR(p) \ ((struct ip_header *)(((uint8_t *)(p)) + eth_get_l2_hdr_length(p))) #define IP_HDR_GET_LEN(p) \ - ((((struct ip_header *)p)->ip_ver_len & 0x0F) << 2) + ((((struct ip_header *)(p))->ip_ver_len & 0x0F) << 2) #define PKT_GET_IP_HDR_LEN(p) \ (IP_HDR_GET_LEN(PKT_GET_IP_HDR(p))) #define PKT_GET_IP6_HDR(p) \ ((struct ip6_header *) (((uint8_t *)(p)) + eth_get_l2_hdr_length(p))) #define IP_HEADER_VERSION(ip) \ - ((ip->ip_ver_len >> 4)&0xf) + (((ip)->ip_ver_len >> 4) & 0xf) +#define IP4_IS_FRAGMENT(ip) \ + ((be16_to_cpu((ip)->ip_off) & (IP_OFFMASK | IP_MF)) != 0) #define ETH_P_IP (0x0800) /* Internet Protocol packet */ #define ETH_P_ARP (0x0806) /* Address Resolution packet */ #define ETH_P_IPV6 (0x86dd) #define ETH_P_VLAN (0x8100) #define ETH_P_DVLAN (0x88a8) +#define ETH_P_UNKNOWN (0xffff) #define VLAN_VID_MASK 0x0fff #define IP_HEADER_VERSION_4 (4) #define IP_HEADER_VERSION_6 (6) @@ -266,7 +294,7 @@ eth_get_l2_hdr_length(const void *p) case ETH_P_VLAN: return sizeof(struct eth_header) + sizeof(struct vlan_header); case ETH_P_DVLAN: - if (hvlan->h_proto == ETH_P_VLAN) { + if (be16_to_cpu(hvlan->h_proto) == ETH_P_VLAN) { return sizeof(struct eth_header) + 2 * sizeof(struct vlan_header); } else { return sizeof(struct eth_header) + sizeof(struct vlan_header); @@ -276,6 +304,19 @@ eth_get_l2_hdr_length(const void *p) } } +static inline uint32_t +eth_get_l2_hdr_length_iov(const struct iovec *iov, int iovcnt) +{ + uint8_t p[sizeof(struct eth_header) + sizeof(struct vlan_header)]; + size_t copied = iov_to_buf(iov, iovcnt, 0, p, ARRAY_SIZE(p)); + + if (copied < ARRAY_SIZE(p)) { + return copied; + } + + return eth_get_l2_hdr_length(p); +} + static inline uint16_t eth_get_pkt_tci(const void *p) { @@ -290,51 +331,67 @@ eth_get_pkt_tci(const void *p) } } -static inline bool -eth_strip_vlan(const void *p, uint8_t *new_ehdr_buf, - uint16_t *payload_offset, uint16_t *tci) -{ - uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto); - struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p); - struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; +bool +eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, + uint8_t *new_ehdr_buf, + uint16_t *payload_offset, uint16_t *tci); - switch (proto) { - case ETH_P_VLAN: - case ETH_P_DVLAN: - memcpy(new_ehdr->h_source, PKT_GET_ETH_HDR(p)->h_source, ETH_ALEN); - memcpy(new_ehdr->h_dest, PKT_GET_ETH_HDR(p)->h_dest, ETH_ALEN); - new_ehdr->h_proto = hvlan->h_proto; - *tci = be16_to_cpu(hvlan->h_tci); - *payload_offset = - sizeof(struct eth_header) + sizeof(struct vlan_header); - if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) { - memcpy(PKT_GET_VLAN_HDR(new_ehdr), - PKT_GET_DVLAN_HDR(p), - sizeof(struct vlan_header)); - *payload_offset += sizeof(struct vlan_header); - } - return true; - default: - return false; - } -} +bool +eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, + uint16_t vet, uint8_t *new_ehdr_buf, + uint16_t *payload_offset, uint16_t *tci); -static inline uint16_t -eth_get_l3_proto(const void *l2hdr, size_t l2hdr_len) +uint16_t +eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len); + +void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag, + uint16_t vlan_ethtype, bool *is_new); + +static inline void +eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, + bool *is_new) { - uint8_t *proto_ptr = (uint8_t *) l2hdr + l2hdr_len - sizeof(uint16_t); - return be16_to_cpup((uint16_t *)proto_ptr); + eth_setup_vlan_headers_ex(ehdr, vlan_tag, ETH_P_VLAN, is_new); } -void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, - bool *is_new); uint8_t eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto); -void eth_get_protocols(const uint8_t *headers, - uint32_t hdr_length, +typedef struct eth_ip6_hdr_info_st { + uint8_t l4proto; + size_t full_hdr_len; + struct ip6_header ip6_hdr; + bool has_ext_hdrs; + bool rss_ex_src_valid; + struct in6_address rss_ex_src; + bool rss_ex_dst_valid; + struct in6_address rss_ex_dst; + bool fragment; +} eth_ip6_hdr_info; + +typedef struct eth_ip4_hdr_info_st { + struct ip_header ip4_hdr; + bool fragment; +} eth_ip4_hdr_info; + +typedef struct eth_l4_hdr_info_st { + union { + struct tcp_header tcp; + struct udp_header udp; + } hdr; + + bool has_tcp_data; +} eth_l4_hdr_info; + +void eth_get_protocols(const struct iovec *iov, int iovcnt, bool *isip4, bool *isip6, - bool *isudp, bool *istcp); + bool *isudp, bool *istcp, + size_t *l3hdr_off, + size_t *l4hdr_off, + size_t *l5hdr_off, + eth_ip6_hdr_info *ip6hdr_info, + eth_ip4_hdr_info *ip4hdr_info, + eth_l4_hdr_info *l4hdr_info); void eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len, void *l3hdr, size_t l3hdr_len, @@ -345,11 +402,18 @@ void eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len); uint32_t -eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl); +eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr, + uint16_t csl, + uint32_t *cso); + +uint32_t +eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr, + uint16_t csl, + uint8_t l4_proto, + uint32_t *cso); bool -eth_parse_ipv6_hdr(struct iovec *pkt, int pkt_frags, - size_t ip6hdr_off, uint8_t *l4proto, - size_t *full_hdr_len); +eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags, + size_t ip6hdr_off, eth_ip6_hdr_info *info); #endif diff --git a/net/checksum.c b/net/checksum.c index d0fa424cc1..196aaa3459 100644 --- a/net/checksum.c +++ b/net/checksum.c @@ -95,12 +95,11 @@ void net_checksum_calculate(uint8_t *data, int length) uint32_t net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt, - uint32_t iov_off, uint32_t size) + uint32_t iov_off, uint32_t size, uint32_t csum_offset) { size_t iovec_off, buf_off; unsigned int i; uint32_t res = 0; - uint32_t seq = 0; iovec_off = 0; buf_off = 0; @@ -109,8 +108,8 @@ net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt, size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size); void *chunk_buf = iov[i].iov_base + (iov_off - iovec_off); - res += net_checksum_add_cont(len, chunk_buf, seq); - seq += len; + res += net_checksum_add_cont(len, chunk_buf, csum_offset); + csum_offset += len; buf_off += len; iov_off += len; diff --git a/net/eth.c b/net/eth.c index 7e32d274c7..95fe15c23f 100644 --- a/net/eth.c +++ b/net/eth.c @@ -21,8 +21,8 @@ #include "qemu-common.h" #include "net/tap.h" -void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, - bool *is_new) +void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag, + uint16_t vlan_ethtype, bool *is_new) { struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr); @@ -36,7 +36,7 @@ void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, default: /* No VLAN header, put a new one */ vhdr->h_proto = ehdr->h_proto; - ehdr->h_proto = cpu_to_be16(ETH_P_VLAN); + ehdr->h_proto = cpu_to_be16(vlan_ethtype); *is_new = true; break; } @@ -79,26 +79,100 @@ eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto) return VIRTIO_NET_HDR_GSO_NONE | ecn_state; } -void eth_get_protocols(const uint8_t *headers, - uint32_t hdr_length, +uint16_t +eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len) +{ + uint16_t proto; + size_t copied; + size_t size = iov_size(l2hdr_iov, iovcnt); + size_t proto_offset = l2hdr_len - sizeof(proto); + + if (size < proto_offset) { + return ETH_P_UNKNOWN; + } + + copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset, + &proto, sizeof(proto)); + + return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN; +} + +static bool +_eth_copy_chunk(size_t input_size, + const struct iovec *iov, int iovcnt, + size_t offset, size_t length, + void *buffer) +{ + size_t copied; + + if (input_size < offset) { + return false; + } + + copied = iov_to_buf(iov, iovcnt, offset, buffer, length); + + if (copied < length) { + return false; + } + + return true; +} + +static bool +_eth_tcp_has_data(bool is_ip4, + const struct ip_header *ip4_hdr, + const struct ip6_header *ip6_hdr, + size_t full_ip6hdr_len, + const struct tcp_header *tcp) +{ + uint32_t l4len; + + if (is_ip4) { + l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr); + } else { + size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header); + l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len; + } + + return l4len > TCP_HEADER_DATA_OFFSET(tcp); +} + +void eth_get_protocols(const struct iovec *iov, int iovcnt, bool *isip4, bool *isip6, - bool *isudp, bool *istcp) + bool *isudp, bool *istcp, + size_t *l3hdr_off, + size_t *l4hdr_off, + size_t *l5hdr_off, + eth_ip6_hdr_info *ip6hdr_info, + eth_ip4_hdr_info *ip4hdr_info, + eth_l4_hdr_info *l4hdr_info) { int proto; - size_t l2hdr_len = eth_get_l2_hdr_length(headers); - assert(hdr_length >= eth_get_l2_hdr_length(headers)); + bool fragment = false; + size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt); + size_t input_size = iov_size(iov, iovcnt); + size_t copied; + *isip4 = *isip6 = *isudp = *istcp = false; - proto = eth_get_l3_proto(headers, l2hdr_len); + proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len); + + *l3hdr_off = l2hdr_len; + if (proto == ETH_P_IP) { - *isip4 = true; + struct ip_header *iphdr = &ip4hdr_info->ip4_hdr; - struct ip_header *iphdr; + if (input_size < l2hdr_len) { + return; + } + + copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr)); - assert(hdr_length >= - eth_get_l2_hdr_length(headers) + sizeof(struct ip_header)); + *isip4 = true; - iphdr = PKT_GET_IP_HDR(headers); + if (copied < sizeof(*iphdr)) { + return; + } if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) { if (iphdr->ip_p == IP_PROTO_TCP) { @@ -107,24 +181,135 @@ void eth_get_protocols(const uint8_t *headers, *isudp = true; } } - } else if (proto == ETH_P_IPV6) { - uint8_t l4proto; - size_t full_ip6hdr_len; - struct iovec hdr_vec; - hdr_vec.iov_base = (void *) headers; - hdr_vec.iov_len = hdr_length; + ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr); + *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr); + + fragment = ip4hdr_info->fragment; + } else if (proto == ETH_P_IPV6) { *isip6 = true; - if (eth_parse_ipv6_hdr(&hdr_vec, 1, l2hdr_len, - &l4proto, &full_ip6hdr_len)) { - if (l4proto == IP_PROTO_TCP) { + if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len, + ip6hdr_info)) { + if (ip6hdr_info->l4proto == IP_PROTO_TCP) { *istcp = true; - } else if (l4proto == IP_PROTO_UDP) { + } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) { *isudp = true; } + } else { + return; + } + + *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len; + fragment = ip6hdr_info->fragment; + } + + if (!fragment) { + if (*istcp) { + *istcp = _eth_copy_chunk(input_size, + iov, iovcnt, + *l4hdr_off, sizeof(l4hdr_info->hdr.tcp), + &l4hdr_info->hdr.tcp); + + if (istcp) { + *l5hdr_off = *l4hdr_off + + TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp); + + l4hdr_info->has_tcp_data = + _eth_tcp_has_data(proto == ETH_P_IP, + &ip4hdr_info->ip4_hdr, + &ip6hdr_info->ip6_hdr, + *l4hdr_off - *l3hdr_off, + &l4hdr_info->hdr.tcp); + } + } else if (*isudp) { + *isudp = _eth_copy_chunk(input_size, + iov, iovcnt, + *l4hdr_off, sizeof(l4hdr_info->hdr.udp), + &l4hdr_info->hdr.udp); + *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp); + } + } +} + +bool +eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, + uint8_t *new_ehdr_buf, + uint16_t *payload_offset, uint16_t *tci) +{ + struct vlan_header vlan_hdr; + struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; + + size_t copied = iov_to_buf(iov, iovcnt, iovoff, + new_ehdr, sizeof(*new_ehdr)); + + if (copied < sizeof(*new_ehdr)) { + return false; + } + + switch (be16_to_cpu(new_ehdr->h_proto)) { + case ETH_P_VLAN: + case ETH_P_DVLAN: + copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr), + &vlan_hdr, sizeof(vlan_hdr)); + + if (copied < sizeof(vlan_hdr)) { + return false; + } + + new_ehdr->h_proto = vlan_hdr.h_proto; + + *tci = be16_to_cpu(vlan_hdr.h_tci); + *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr); + + if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) { + + copied = iov_to_buf(iov, iovcnt, *payload_offset, + PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr)); + + if (copied < sizeof(vlan_hdr)) { + return false; + } + + *payload_offset += sizeof(vlan_hdr); + } + return true; + default: + return false; + } +} + +bool +eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, + uint16_t vet, uint8_t *new_ehdr_buf, + uint16_t *payload_offset, uint16_t *tci) +{ + struct vlan_header vlan_hdr; + struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; + + size_t copied = iov_to_buf(iov, iovcnt, iovoff, + new_ehdr, sizeof(*new_ehdr)); + + if (copied < sizeof(*new_ehdr)) { + return false; + } + + if (be16_to_cpu(new_ehdr->h_proto) == vet) { + copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr), + &vlan_hdr, sizeof(vlan_hdr)); + + if (copied < sizeof(vlan_hdr)) { + return false; } + + new_ehdr->h_proto = vlan_hdr.h_proto; + + *tci = be16_to_cpu(vlan_hdr.h_tci); + *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr); + return true; } + + return false; } void @@ -133,7 +318,12 @@ eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len, size_t l3payload_len, size_t frag_offset, bool more_frags) { - if (eth_get_l3_proto(l2hdr, l2hdr_len) == ETH_P_IP) { + const struct iovec l2vec = { + .iov_base = (void *) l2hdr, + .iov_len = l2hdr_len + }; + + if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) { uint16_t orig_flags; struct ip_header *iphdr = (struct ip_header *) l3hdr; uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE; @@ -158,7 +348,9 @@ eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len) } uint32_t -eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl) +eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr, + uint16_t csl, + uint32_t *cso) { struct ip_pseudo_header ipph; ipph.ip_src = iphdr->ip_src; @@ -166,7 +358,26 @@ eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl) ipph.ip_payload = cpu_to_be16(csl); ipph.ip_proto = iphdr->ip_p; ipph.zeros = 0; - return net_checksum_add(sizeof(ipph), (uint8_t *) &ipph); + *cso = sizeof(ipph); + return net_checksum_add(*cso, (uint8_t *) &ipph); +} + +uint32_t +eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr, + uint16_t csl, + uint8_t l4_proto, + uint32_t *cso) +{ + struct ip6_pseudo_header ipph; + ipph.ip6_src = iphdr->ip6_src; + ipph.ip6_dst = iphdr->ip6_dst; + ipph.len = cpu_to_be16(csl); + ipph.zero[0] = 0; + ipph.zero[1] = 0; + ipph.zero[2] = 0; + ipph.next_hdr = l4_proto; + *cso = sizeof(ipph); + return net_checksum_add(*cso, (uint8_t *)&ipph); } static bool @@ -186,33 +397,152 @@ eth_is_ip6_extension_header_type(uint8_t hdr_type) } } -bool eth_parse_ipv6_hdr(struct iovec *pkt, int pkt_frags, - size_t ip6hdr_off, uint8_t *l4proto, - size_t *full_hdr_len) +static bool +_eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, + size_t rthdr_offset, + struct ip6_ext_hdr *ext_hdr, + struct in6_address *dst_addr) +{ + struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr; + + if ((rthdr->rtype == 2) && + (rthdr->len == sizeof(struct in6_address) / 8) && + (rthdr->segleft == 1)) { + + size_t input_size = iov_size(pkt, pkt_frags); + size_t bytes_read; + + if (input_size < rthdr_offset + sizeof(*ext_hdr)) { + return false; + } + + bytes_read = iov_to_buf(pkt, pkt_frags, + rthdr_offset + sizeof(*ext_hdr), + dst_addr, sizeof(dst_addr)); + + return bytes_read == sizeof(dst_addr); + } + + return false; +} + +static bool +_eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags, + size_t dsthdr_offset, + struct ip6_ext_hdr *ext_hdr, + struct in6_address *src_addr) +{ + size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr); + struct ip6_option_hdr opthdr; + size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr); + + while (bytes_left > sizeof(opthdr)) { + size_t input_size = iov_size(pkt, pkt_frags); + size_t bytes_read, optlen; + + if (input_size < opt_offset) { + return false; + } + + bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset, + &opthdr, sizeof(opthdr)); + + if (bytes_read != sizeof(opthdr)) { + return false; + } + + optlen = (opthdr.type == IP6_OPT_PAD1) ? 1 + : (opthdr.len + sizeof(opthdr)); + + if (optlen > bytes_left) { + return false; + } + + if (opthdr.type == IP6_OPT_HOME) { + size_t input_size = iov_size(pkt, pkt_frags); + + if (input_size < opt_offset + sizeof(opthdr)) { + return false; + } + + bytes_read = iov_to_buf(pkt, pkt_frags, + opt_offset + sizeof(opthdr), + src_addr, sizeof(src_addr)); + + return bytes_read == sizeof(src_addr); + } + + opt_offset += optlen; + bytes_left -= optlen; + } + + return false; +} + +bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags, + size_t ip6hdr_off, eth_ip6_hdr_info *info) { - struct ip6_header ip6_hdr; struct ip6_ext_hdr ext_hdr; size_t bytes_read; + uint8_t curr_ext_hdr_type; + size_t input_size = iov_size(pkt, pkt_frags); + + info->rss_ex_dst_valid = false; + info->rss_ex_src_valid = false; + info->fragment = false; + + if (input_size < ip6hdr_off) { + return false; + } bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off, - &ip6_hdr, sizeof(ip6_hdr)); - if (bytes_read < sizeof(ip6_hdr)) { + &info->ip6_hdr, sizeof(info->ip6_hdr)); + if (bytes_read < sizeof(info->ip6_hdr)) { return false; } - *full_hdr_len = sizeof(struct ip6_header); + info->full_hdr_len = sizeof(struct ip6_header); + + curr_ext_hdr_type = info->ip6_hdr.ip6_nxt; - if (!eth_is_ip6_extension_header_type(ip6_hdr.ip6_nxt)) { - *l4proto = ip6_hdr.ip6_nxt; + if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) { + info->l4proto = info->ip6_hdr.ip6_nxt; + info->has_ext_hdrs = false; return true; } + info->has_ext_hdrs = true; + do { - bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + *full_hdr_len, + if (input_size < ip6hdr_off + info->full_hdr_len) { + return false; + } + + bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len, &ext_hdr, sizeof(ext_hdr)); - *full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY; - } while (eth_is_ip6_extension_header_type(ext_hdr.ip6r_nxt)); - *l4proto = ext_hdr.ip6r_nxt; + if (bytes_read < sizeof(ext_hdr)) { + return false; + } + + if (curr_ext_hdr_type == IP6_ROUTING) { + info->rss_ex_dst_valid = + _eth_get_rss_ex_dst_addr(pkt, pkt_frags, + ip6hdr_off + info->full_hdr_len, + &ext_hdr, &info->rss_ex_dst); + } else if (curr_ext_hdr_type == IP6_DESTINATON) { + info->rss_ex_src_valid = + _eth_get_rss_ex_src_addr(pkt, pkt_frags, + ip6hdr_off + info->full_hdr_len, + &ext_hdr, &info->rss_ex_src); + } else if (curr_ext_hdr_type == IP6_FRAGMENT) { + info->fragment = true; + } + + info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY; + curr_ext_hdr_type = ext_hdr.ip6r_nxt; + } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type)); + + info->l4proto = ext_hdr.ip6r_nxt; return true; } diff --git a/trace-events b/trace-events index b53c3541a3..03cb7703ba 100644 --- a/trace-events +++ b/trace-events @@ -1914,3 +1914,43 @@ gic_set_irq(int irq, int level, int cpumask, int target) "irq %d level %d cpumas gic_update_bestirq(int cpu, int irq, int prio, int priority_mask, int running_priority) "cpu %d irq %d priority %d cpu priority mask %d cpu running priority %d" gic_update_set_irq(int cpu, const char *name, int level) "cpu[%d]: %s = %d" gic_acknowledge_irq(int cpu, int irq) "cpu %d acknowledged irq %d" + +# hw/net/net_rx_pkt.c +net_rx_pkt_parsed(bool ip4, bool ip6, bool udp, bool tcp, size_t l3o, size_t l4o, size_t l5o) "RX packet parsed: ip4: %d, ip6: %d, udp: %d, tcp: %d, l3 offset: %zu, l4 offset: %zu, l5 offset: %zu" +net_rx_pkt_l4_csum_validate_entry(void) "Starting L4 checksum validation" +net_rx_pkt_l4_csum_validate_not_xxp(void) "Not a TCP/UDP packet" +net_rx_pkt_l4_csum_validate_udp_with_no_checksum(void) "UDP packet without checksum" +net_rx_pkt_l4_csum_validate_ip4_fragment(void) "IP4 fragment" +net_rx_pkt_l4_csum_validate_ip4_udp(void) "IP4/UDP packet" +net_rx_pkt_l4_csum_validate_ip4_tcp(void) "IP4/TCP packet" +net_rx_pkt_l4_csum_validate_ip6_udp(void) "IP6/UDP packet" +net_rx_pkt_l4_csum_validate_ip6_tcp(void) "IP6/TCP packet" +net_rx_pkt_l4_csum_validate_csum(bool csum_valid) "Checksum valid: %d" + +net_rx_pkt_l4_csum_calc_entry(void) "Starting L4 checksum calculation" +net_rx_pkt_l4_csum_calc_ip4_udp(void) "IP4/UDP packet" +net_rx_pkt_l4_csum_calc_ip4_tcp(void) "IP4/TCP packet" +net_rx_pkt_l4_csum_calc_ip6_udp(void) "IP6/UDP packet" +net_rx_pkt_l4_csum_calc_ip6_tcp(void) "IP6/TCP packet" +net_rx_pkt_l4_csum_calc_ph_csum(uint32_t cntr, uint16_t csl) "Pseudo-header: checksum counter %u, length %u" +net_rx_pkt_l4_csum_calc_csum(size_t l4hdr_off, uint16_t csl, uint32_t cntr, uint16_t csum) "L4 Checksum: L4 header offset: %zu, length: %u, counter: 0x%X, final checksum: 0x%X" + +net_rx_pkt_l4_csum_fix_entry(void) "Starting L4 checksum correction" +net_rx_pkt_l4_csum_fix_tcp(uint32_t l4_cso) "TCP packet, L4 cso: %u" +net_rx_pkt_l4_csum_fix_udp(uint32_t l4_cso) "UDP packet, L4 cso: %u" +net_rx_pkt_l4_csum_fix_not_xxp(void) "Not an IP4 packet" +net_rx_pkt_l4_csum_fix_ip4_fragment(void) "IP4 fragment" +net_rx_pkt_l4_csum_fix_udp_with_no_checksum(void) "UDP packet without checksum" +net_rx_pkt_l4_csum_fix_csum(uint32_t cso, uint16_t csum) "L4 Checksum: Offset: %u, value 0x%X" + +net_rx_pkt_l3_csum_validate_entry(void) "Starting L3 checksum validation" +net_rx_pkt_l3_csum_validate_not_ip4(void) "Not an IP4 packet" +net_rx_pkt_l3_csum_validate_csum(size_t l3hdr_off, uint32_t csl, uint32_t cntr, uint16_t csum, bool csum_valid) "L3 Checksum: L3 header offset: %zu, length: %u, counter: 0x%X, final checksum: 0x%X, valid: %d" + +net_rx_pkt_rss_ip4(void) "Calculating IPv4 RSS hash" +net_rx_pkt_rss_ip4_tcp(void) "Calculating IPv4/TCP RSS hash" +net_rx_pkt_rss_ip6_tcp(void) "Calculating IPv6/TCP RSS hash" +net_rx_pkt_rss_ip6(void) "Calculating IPv6 RSS hash" +net_rx_pkt_rss_ip6_ex(void) "Calculating IPv6/EX RSS hash" +net_rx_pkt_rss_hash(size_t rss_length, uint32_t rss_hash) "RSS hash for %zu bytes: 0x%X" +net_rx_pkt_rss_add_chunk(void* ptr, size_t size, size_t input_offset) "Add RSS chunk %p, %zu bytes, RSS input offset %zu bytes" -- cgit 1.4.1 From 111710107dc6041de871af437214d32d1db088d1 Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:42 +0300 Subject: vmxnet3: Use pci_dma_* API instead of cpu_physical_memory_* To make this device and network packets abstractions ready for IOMMU. Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/net_tx_pkt.c | 16 +++++++++++----- hw/net/net_tx_pkt.h | 5 +++-- hw/net/vmxnet3.c | 51 ++++++++++++++++++++++++++++++--------------------- 3 files changed, 44 insertions(+), 28 deletions(-) (limited to 'hw/net/net_tx_pkt.c') diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c index a64f51cbef..e4478bead8 100644 --- a/hw/net/net_tx_pkt.c +++ b/hw/net/net_tx_pkt.c @@ -20,6 +20,7 @@ #include "net/checksum.h" #include "net/tap.h" #include "net/net.h" +#include "hw/pci/pci.h" enum { NET_TX_PKT_VHDR_FRAG = 0, @@ -30,6 +31,8 @@ enum { /* TX packet private context */ struct NetTxPkt { + PCIDevice *pci_dev; + struct virtio_net_hdr virt_hdr; bool has_virt_hdr; @@ -54,11 +57,13 @@ struct NetTxPkt { bool is_loopback; }; -void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags, - bool has_virt_hdr) +void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev, + uint32_t max_frags, bool has_virt_hdr) { struct NetTxPkt *p = g_malloc0(sizeof *p); + p->pci_dev = pci_dev; + p->vec = g_malloc((sizeof *p->vec) * (max_frags + NET_TX_PKT_PL_START_FRAG)); @@ -383,7 +388,8 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, ventry = &pkt->raw[pkt->raw_frags]; mapped_len = len; - ventry->iov_base = cpu_physical_memory_map(pa, &mapped_len, false); + ventry->iov_base = pci_dma_map(pkt->pci_dev, pa, + &mapped_len, DMA_DIRECTION_TO_DEVICE); if ((ventry->iov_base != NULL) && (len == mapped_len)) { ventry->iov_len = mapped_len; @@ -444,8 +450,8 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt) assert(pkt->raw); for (i = 0; i < pkt->raw_frags; i++) { assert(pkt->raw[i].iov_base); - cpu_physical_memory_unmap(pkt->raw[i].iov_base, pkt->raw[i].iov_len, - false, pkt->raw[i].iov_len); + pci_dma_unmap(pkt->pci_dev, pkt->raw[i].iov_base, pkt->raw[i].iov_len, + DMA_DIRECTION_TO_DEVICE, 0); } pkt->raw_frags = 0; diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h index e49772d238..07b9a2098b 100644 --- a/hw/net/net_tx_pkt.h +++ b/hw/net/net_tx_pkt.h @@ -31,11 +31,12 @@ struct NetTxPkt; * Init function for tx packet functionality * * @pkt: packet pointer + * @pci_dev: PCI device processing this packet * @max_frags: max tx ip fragments * @has_virt_hdr: device uses virtio header. */ -void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags, - bool has_virt_hdr); +void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev, + uint32_t max_frags, bool has_virt_hdr); /** * Clean all tx packet resources. diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 33cd07da0c..16645e6c23 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -802,7 +802,9 @@ vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen) hwaddr daddr = vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring); - cpu_physical_memory_read(daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc)); + pci_dma_read(PCI_DEVICE(s), daddr, + &rxcd, sizeof(struct Vmxnet3_RxCompDesc)); + ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring); if (rxcd.gen != ring_gen) { @@ -1023,10 +1025,11 @@ nocsum: } static void -vmxnet3_physical_memory_writev(const struct iovec *iov, - size_t start_iov_off, - hwaddr target_addr, - size_t bytes_to_copy) +vmxnet3_pci_dma_writev(PCIDevice *pci_dev, + const struct iovec *iov, + size_t start_iov_off, + hwaddr target_addr, + size_t bytes_to_copy) { size_t curr_off = 0; size_t copied = 0; @@ -1036,9 +1039,9 @@ vmxnet3_physical_memory_writev(const struct iovec *iov, size_t chunk_len = MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy); - cpu_physical_memory_write(target_addr + copied, - iov->iov_base + start_iov_off - curr_off, - chunk_len); + pci_dma_write(pci_dev, target_addr + copied, + iov->iov_base + start_iov_off - curr_off, + chunk_len); copied += chunk_len; start_iov_off += chunk_len; @@ -1088,15 +1091,15 @@ vmxnet3_indicate_packet(VMXNET3State *s) } chunk_size = MIN(bytes_left, rxd.len); - vmxnet3_physical_memory_writev(data, bytes_copied, - le64_to_cpu(rxd.addr), chunk_size); + vmxnet3_pci_dma_writev(PCI_DEVICE(s), data, bytes_copied, + le64_to_cpu(rxd.addr), chunk_size); bytes_copied += chunk_size; bytes_left -= chunk_size; vmxnet3_dump_rx_descr(&rxd); if (ready_rxcd_pa != 0) { - cpu_physical_memory_write(ready_rxcd_pa, &rxcd, sizeof(rxcd)); + pci_dma_write(PCI_DEVICE(s), ready_rxcd_pa, &rxcd, sizeof(rxcd)); } memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc)); @@ -1127,7 +1130,8 @@ vmxnet3_indicate_packet(VMXNET3State *s) if (ready_rxcd_pa != 0) { rxcd.eop = 1; rxcd.err = (bytes_left != 0); - cpu_physical_memory_write(ready_rxcd_pa, &rxcd, sizeof(rxcd)); + + pci_dma_write(PCI_DEVICE(s), ready_rxcd_pa, &rxcd, sizeof(rxcd)); /* Flush RX descriptor changes */ smp_wmb(); @@ -1298,7 +1302,8 @@ static void vmxnet3_update_mcast_filters(VMXNET3State *s) VMXNET3_READ_DRV_SHARED64(s->drv_shmem, devRead.rxFilterConf.mfTablePA); - cpu_physical_memory_read(mcast_list_pa, s->mcast_list, list_bytes); + pci_dma_read(PCI_DEVICE(s), mcast_list_pa, s->mcast_list, list_bytes); + VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len); for (i = 0; i < s->mcast_list_len; i++) { VMW_CFPRN("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a)); @@ -1328,15 +1333,17 @@ static void vmxnet3_fill_stats(VMXNET3State *s) return; for (i = 0; i < s->txq_num; i++) { - cpu_physical_memory_write(s->txq_descr[i].tx_stats_pa, - &s->txq_descr[i].txq_stats, - sizeof(s->txq_descr[i].txq_stats)); + pci_dma_write(PCI_DEVICE(s), + s->txq_descr[i].tx_stats_pa, + &s->txq_descr[i].txq_stats, + sizeof(s->txq_descr[i].txq_stats)); } for (i = 0; i < s->rxq_num; i++) { - cpu_physical_memory_write(s->rxq_descr[i].rx_stats_pa, - &s->rxq_descr[i].rxq_stats, - sizeof(s->rxq_descr[i].rxq_stats)); + pci_dma_write(PCI_DEVICE(s), + s->rxq_descr[i].rx_stats_pa, + &s->rxq_descr[i].rxq_stats, + sizeof(s->rxq_descr[i].rxq_stats)); } } @@ -1558,7 +1565,8 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* Preallocate TX packet wrapper */ VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags); - net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr); + net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), + s->max_tx_frags, s->peer_has_vhdr); net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); /* Read rings memory locations for RX queues */ @@ -2536,7 +2544,8 @@ static int vmxnet3_post_load(void *opaque, int version_id) VMXNET3State *s = opaque; PCIDevice *d = PCI_DEVICE(s); - net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr); + net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), + s->max_tx_frags, s->peer_has_vhdr); net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); if (s->msix_used) { -- cgit 1.4.1