Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 1 Sep 2013 04:33:47 +0000 (UTC)
From:      Bryan Venteicher <bryanv@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r255112 - in head: share/man/man4 sys/dev/virtio/network sys/modules/virtio/network
Message-ID:  <201309010433.r814Xl5r005360@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: bryanv
Date: Sun Sep  1 04:33:47 2013
New Revision: 255112
URL: http://svnweb.freebsd.org/changeset/base/255112

Log:
  Import multiqueue VirtIO net driver from my user/bryanv/vtnetmq branch
  
  This is a significant rewrite of much of the previous driver; lots of
  misc. cleanup was also performed, and support for a few other minor
  features was also added.

Modified:
  head/share/man/man4/vtnet.4
  head/sys/dev/virtio/network/if_vtnet.c
  head/sys/dev/virtio/network/if_vtnetvar.h
  head/sys/modules/virtio/network/Makefile

Modified: head/share/man/man4/vtnet.4
==============================================================================
--- head/share/man/man4/vtnet.4	Sun Sep  1 04:23:54 2013	(r255111)
+++ head/share/man/man4/vtnet.4	Sun Sep  1 04:33:47 2013	(r255112)
@@ -69,14 +69,30 @@ prompt before booting the kernel or stor
 .Xr loader.conf 5 .
 .Bl -tag -width "xxxxxx"
 .It Va hw.vtnet.csum_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .csum_disable
 This tunable disables receive and send checksum offload.
 The default value is 0.
 .It Va hw.vtnet.tso_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .tso_disable
 This tunable disables TSO.
 The default value is 0.
 .It Va hw.vtnet.lro_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .lro_disable
 This tunable disables LRO.
 The default value is 0.
+.It Va hw.vtnet.mq_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .mq_disable
+This tunable disables multiqueue.
+The default value is 0.
+.It Va hw.vtnet.mq_max_pairs
+.It Va hw.vtnet. Ns Ar X Ns Va .mq_max_pairs
+This tunable sets the maximum number of transmit and receive queue pairs.
+Multiple queues are only supported when the Multiqueue feature is negotiated.
+This driver supports a maximum of 8 queue pairs.
+The number of queue pairs used is the lesser of the maximum supported by the
+driver and the hypervisor, the number of CPUs present in the guest, and this
+tunable if not zero.
+The default value is 0.
 .El
 .Sh SEE ALSO
 .Xr arp 4 ,

Modified: head/sys/dev/virtio/network/if_vtnet.c
==============================================================================
--- head/sys/dev/virtio/network/if_vtnet.c	Sun Sep  1 04:23:54 2013	(r255111)
+++ head/sys/dev/virtio/network/if_vtnet.c	Sun Sep  1 04:33:47 2013	(r255112)
@@ -29,10 +29,6 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#ifdef HAVE_KERNEL_OPTION_HEADERS
-#include "opt_device_polling.h"
-#endif
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -46,6 +42,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/sglist.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/taskqueue.h>
+#include <sys/smp.h>
+#include <machine/smp.h>
 
 #include <vm/uma.h>
 
@@ -63,6 +62,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
 #include <netinet/udp.h>
 #include <netinet/tcp.h>
 #include <netinet/sctp.h>
@@ -79,6 +79,9 @@ __FBSDID("$FreeBSD$");
 
 #include "virtio_if.h"
 
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
 static int	vtnet_modevent(module_t, int, void *);
 
 static int	vtnet_probe(device_t);
@@ -87,82 +90,139 @@ static int	vtnet_detach(device_t);
 static int	vtnet_suspend(device_t);
 static int	vtnet_resume(device_t);
 static int	vtnet_shutdown(device_t);
+static int	vtnet_attach_completed(device_t);
 static int	vtnet_config_change(device_t);
 
 static void	vtnet_negotiate_features(struct vtnet_softc *);
+static void	vtnet_setup_features(struct vtnet_softc *);
+static int	vtnet_init_rxq(struct vtnet_softc *, int);
+static int	vtnet_init_txq(struct vtnet_softc *, int);
+static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
+static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
+static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
+static void	vtnet_free_rx_filters(struct vtnet_softc *);
 static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
-static void	vtnet_get_hwaddr(struct vtnet_softc *);
-static void	vtnet_set_hwaddr(struct vtnet_softc *);
-static int	vtnet_is_link_up(struct vtnet_softc *);
-static void	vtnet_update_link_status(struct vtnet_softc *);
-static void	vtnet_watchdog(struct vtnet_softc *);
+static int	vtnet_setup_interface(struct vtnet_softc *);
 static int	vtnet_change_mtu(struct vtnet_softc *, int);
 static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t);
 
-static int	vtnet_init_rx_vq(struct vtnet_softc *);
-static void	vtnet_free_rx_mbufs(struct vtnet_softc *);
-static void	vtnet_free_tx_mbufs(struct vtnet_softc *);
-static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
-
-#ifdef DEVICE_POLLING
-static poll_handler_t vtnet_poll;
-#endif
-
-static struct mbuf * vtnet_alloc_rxbuf(struct vtnet_softc *, int,
-		    struct mbuf **);
-static int	vtnet_replace_rxbuf(struct vtnet_softc *,
+static int	vtnet_rxq_populate(struct vtnet_rxq *);
+static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
+static struct mbuf *
+		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
+static int	vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
 		    struct mbuf *, int);
-static int	vtnet_newbuf(struct vtnet_softc *);
-static void	vtnet_discard_merged_rxbuf(struct vtnet_softc *, int);
-static void	vtnet_discard_rxbuf(struct vtnet_softc *, struct mbuf *);
-static int	vtnet_enqueue_rxbuf(struct vtnet_softc *, struct mbuf *);
-static void	vtnet_vlan_tag_remove(struct mbuf *);
-static int	vtnet_rx_csum(struct vtnet_softc *, struct mbuf *,
+static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
+static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
+static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
+static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
+		     struct virtio_net_hdr *);
+static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
+static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
+static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
+static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
 		    struct virtio_net_hdr *);
-static int	vtnet_rxeof_merged(struct vtnet_softc *, struct mbuf *, int);
-static int	vtnet_rxeof(struct vtnet_softc *, int, int *);
+static int	vtnet_rxq_eof(struct vtnet_rxq *);
 static void	vtnet_rx_vq_intr(void *);
+static void	vtnet_rxq_tq_intr(void *, int);
 
-static void	vtnet_txeof(struct vtnet_softc *);
-static struct mbuf * vtnet_tx_offload(struct vtnet_softc *, struct mbuf *,
+static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
+static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
+		    int *, int *, int *);
+static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
+		    int, struct virtio_net_hdr *);
+static struct mbuf *
+		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
 		    struct virtio_net_hdr *);
-static int	vtnet_enqueue_txbuf(struct vtnet_softc *, struct mbuf **,
+static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
 		    struct vtnet_tx_header *);
-static int	vtnet_encap(struct vtnet_softc *, struct mbuf **);
-static void	vtnet_start_locked(struct ifnet *);
+static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **);
+#ifdef VTNET_LEGACY_TX
+static void	vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
 static void	vtnet_start(struct ifnet *);
-static void	vtnet_tick(void *);
+#else
+static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
+static int	vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
+static void	vtnet_txq_tq_deferred(void *, int);
+#endif
+static void	vtnet_txq_tq_intr(void *, int);
+static void	vtnet_txq_eof(struct vtnet_txq *);
 static void	vtnet_tx_vq_intr(void *);
+static void	vtnet_tx_start_all(struct vtnet_softc *);
+
+#ifndef VTNET_LEGACY_TX
+static void	vtnet_qflush(struct ifnet *);
+#endif
+
+static int	vtnet_watchdog(struct vtnet_txq *);
+static void	vtnet_rxq_accum_stats(struct vtnet_rxq *,
+		    struct vtnet_rxq_stats *);
+static void	vtnet_txq_accum_stats(struct vtnet_txq *,
+		    struct vtnet_txq_stats *);
+static void	vtnet_accumulate_stats(struct vtnet_softc *);
+static void	vtnet_tick(void *);
+
+static void	vtnet_start_taskqueues(struct vtnet_softc *);
+static void	vtnet_free_taskqueues(struct vtnet_softc *);
+static void	vtnet_drain_taskqueues(struct vtnet_softc *);
 
+static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
+static void	vtnet_stop_rendezvous(struct vtnet_softc *);
 static void	vtnet_stop(struct vtnet_softc *);
+static int	vtnet_virtio_reinit(struct vtnet_softc *);
+static void	vtnet_init_rx_filters(struct vtnet_softc *);
+static int	vtnet_init_rx_queues(struct vtnet_softc *);
+static int	vtnet_init_tx_queues(struct vtnet_softc *);
+static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
+static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
 static int	vtnet_reinit(struct vtnet_softc *);
 static void	vtnet_init_locked(struct vtnet_softc *);
 static void	vtnet_init(void *);
 
+static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
 static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
 		    struct sglist *, int, int);
-
-static void	vtnet_rx_filter(struct vtnet_softc *sc);
+static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
+static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
 static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
 static int	vtnet_set_promisc(struct vtnet_softc *, int);
 static int	vtnet_set_allmulti(struct vtnet_softc *, int);
+static void	vtnet_attach_disable_promisc(struct vtnet_softc *);
+static void	vtnet_rx_filter(struct vtnet_softc *);
 static void	vtnet_rx_filter_mac(struct vtnet_softc *);
-
 static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
 static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
-static void	vtnet_set_vlan_filter(struct vtnet_softc *, int, uint16_t);
+static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
 static void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
 static void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
 
+static int	vtnet_is_link_up(struct vtnet_softc *);
+static void	vtnet_update_link_status(struct vtnet_softc *);
 static int	vtnet_ifmedia_upd(struct ifnet *);
 static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
+static void	vtnet_get_hwaddr(struct vtnet_softc *);
+static void	vtnet_set_hwaddr(struct vtnet_softc *);
+static void	vtnet_vlan_tag_remove(struct mbuf *);
 
-static void	vtnet_add_statistics(struct vtnet_softc *);
+static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
+		    struct sysctl_oid_list *, struct vtnet_rxq *);
+static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
+		    struct sysctl_oid_list *, struct vtnet_txq *);
+static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
+static void	vtnet_setup_sysctl(struct vtnet_softc *);
+
+static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
+static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
+static int	vtnet_txq_enable_intr(struct vtnet_txq *);
+static void	vtnet_txq_disable_intr(struct vtnet_txq *);
+static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
+static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
+static void	vtnet_enable_interrupts(struct vtnet_softc *);
+static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
+static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
+static void	vtnet_disable_interrupts(struct vtnet_softc *);
 
-static int	vtnet_enable_rx_intr(struct vtnet_softc *);
-static int	vtnet_enable_tx_intr(struct vtnet_softc *);
-static void	vtnet_disable_rx_intr(struct vtnet_softc *);
-static void	vtnet_disable_tx_intr(struct vtnet_softc *);
+static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
 
 /* Tunables. */
 static int vtnet_csum_disable = 0;
@@ -171,16 +231,25 @@ static int vtnet_tso_disable = 0;
 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
 static int vtnet_lro_disable = 0;
 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
+static int vtnet_mq_disable = 0;
+TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
+static int vtnet_mq_max_pairs = 0;
+TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
+static int vtnet_rx_process_limit = 512;
+TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
 
 /*
- * Reducing the number of transmit completed interrupts can
- * improve performance. To do so, the define below keeps the
- * Tx vq interrupt disabled and adds calls to vtnet_txeof()
- * in the start and watchdog paths. The price to pay for this
- * is the m_free'ing of transmitted mbufs may be delayed until
- * the watchdog fires.
+ * Reducing the number of transmit completed interrupts can improve
+ * performance. To do so, the define below keeps the Tx vq interrupt
+ * disabled and adds calls to vtnet_txeof() in the start and watchdog
+ * paths. The price to pay for this is the m_free'ing of transmitted
+ * mbufs may be delayed until the watchdog fires.
+ *
+ * BMV: Reintroduce this later as a run-time option, if it makes
+ * sense after the EVENT_IDX feature is supported.
+ *
+ * #define VTNET_TX_INTR_MODERATION
  */
-#define VTNET_TX_INTR_MODERATION
 
 static uma_zone_t vtnet_tx_header_zone;
 
@@ -203,21 +272,25 @@ static struct virtio_feature_desc vtnet_
 	{ VIRTIO_NET_F_CTRL_RX,		"RxMode"	},
 	{ VIRTIO_NET_F_CTRL_VLAN,	"VLanFilter"	},
 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,	"RxModeExtra"	},
+	{ VIRTIO_NET_F_GUEST_ANNOUNCE,	"GuestAnnounce"	},
+	{ VIRTIO_NET_F_MQ,		"Multiqueue"	},
+	{ VIRTIO_NET_F_CTRL_MAC_ADDR,	"SetMacAddress"	},
 
 	{ 0, NULL }
 };
 
 static device_method_t vtnet_methods[] = {
 	/* Device methods. */
-	DEVMETHOD(device_probe,		vtnet_probe),
-	DEVMETHOD(device_attach,	vtnet_attach),
-	DEVMETHOD(device_detach,	vtnet_detach),
-	DEVMETHOD(device_suspend,	vtnet_suspend),
-	DEVMETHOD(device_resume,	vtnet_resume),
-	DEVMETHOD(device_shutdown,	vtnet_shutdown),
+	DEVMETHOD(device_probe,			vtnet_probe),
+	DEVMETHOD(device_attach,		vtnet_attach),
+	DEVMETHOD(device_detach,		vtnet_detach),
+	DEVMETHOD(device_suspend,		vtnet_suspend),
+	DEVMETHOD(device_resume,		vtnet_resume),
+	DEVMETHOD(device_shutdown,		vtnet_shutdown),
 
 	/* VirtIO methods. */
-	DEVMETHOD(virtio_config_change, vtnet_config_change),
+	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
+	DEVMETHOD(virtio_config_change,		vtnet_config_change),
 
 	DEVMETHOD_END
 };
@@ -282,56 +355,31 @@ static int
 vtnet_attach(device_t dev)
 {
 	struct vtnet_softc *sc;
-	struct ifnet *ifp;
-	int tx_size, error;
+	int error;
 
 	sc = device_get_softc(dev);
 	sc->vtnet_dev = dev;
 
-	VTNET_LOCK_INIT(sc);
-	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_MTX(sc), 0);
-
-	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
-	    vtnet_ifmedia_sts);
-	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
-	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
-
-	vtnet_add_statistics(sc);
-
+	/* Register our feature descriptions. */
 	virtio_set_feature_desc(dev, vtnet_feature_desc);
-	vtnet_negotiate_features(sc);
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
-		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
-		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
-	} else
-		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
-
-	sc->vtnet_rx_mbuf_size = MCLBYTES;
-	sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
-		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
 
-		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) {
-			sc->vtnet_mac_filter = malloc(
-			    sizeof(struct vtnet_mac_filter), M_DEVBUF,
-			    M_NOWAIT | M_ZERO);
-			if (sc->vtnet_mac_filter == NULL) {
-				device_printf(dev,
-				    "cannot allocate mac filter table\n");
-				error = ENOMEM;
-				goto fail;
-			}
+	VTNET_CORE_LOCK_INIT(sc);
+	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
 
-			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
-		}
+	vtnet_setup_sysctl(sc);
+	vtnet_setup_features(sc);
 
-		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
-			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
+	error = vtnet_alloc_rx_filters(sc);
+	if (error) {
+		device_printf(dev, "cannot allocate Rx filters\n");
+		goto fail;
 	}
 
-	vtnet_get_hwaddr(sc);
+	error = vtnet_alloc_rxtx_queues(sc);
+	if (error) {
+		device_printf(dev, "cannot allocate queues\n");
+		goto fail;
+	}
 
 	error = vtnet_alloc_virtqueues(sc);
 	if (error) {
@@ -339,111 +387,21 @@ vtnet_attach(device_t dev)
 		goto fail;
 	}
 
-	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
-	if (ifp == NULL) {
-		device_printf(dev, "cannot allocate ifnet structure\n");
-		error = ENOSPC;
+	error = vtnet_setup_interface(sc);
+	if (error) {
+		device_printf(dev, "cannot setup interface\n");
 		goto fail;
 	}
 
-	ifp->if_softc = sc;
-	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
-	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
-	ifp->if_init = vtnet_init;
-	ifp->if_start = vtnet_start;
-	ifp->if_ioctl = vtnet_ioctl;
-
-	sc->vtnet_rx_size = virtqueue_size(sc->vtnet_rx_vq);
-	sc->vtnet_rx_process_limit = sc->vtnet_rx_size;
-
-	tx_size = virtqueue_size(sc->vtnet_tx_vq);
-	sc->vtnet_tx_size = tx_size;
-	IFQ_SET_MAXLEN(&ifp->if_snd, tx_size - 1);
-	ifp->if_snd.ifq_drv_maxlen = tx_size - 1;
-	IFQ_SET_READY(&ifp->if_snd);
-
-	ether_ifattach(ifp, sc->vtnet_hwaddr);
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
-		ifp->if_capabilities |= IFCAP_LINKSTATE;
-
-	/* Tell the upper layer(s) we support long frames. */
-	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
-	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
-		ifp->if_capabilities |= IFCAP_TXCSUM;
-
-		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
-			ifp->if_capabilities |= IFCAP_TSO4;
-		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
-			ifp->if_capabilities |= IFCAP_TSO6;
-		if (ifp->if_capabilities & IFCAP_TSO)
-			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
-
-		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
-			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
-	}
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
-		ifp->if_capabilities |= IFCAP_RXCSUM;
-
-		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
-		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
-			ifp->if_capabilities |= IFCAP_LRO;
-	}
-
-	if (ifp->if_capabilities & IFCAP_HWCSUM) {
-		/*
-		 * VirtIO does not support VLAN tagging, but we can fake
-		 * it by inserting and removing the 802.1Q header during
-		 * transmit and receive. We are then able to do checksum
-		 * offloading of VLAN frames.
-		 */
-		ifp->if_capabilities |=
-		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
-	}
-
-	ifp->if_capenable = ifp->if_capabilities;
-
-	/*
-	 * Capabilities after here are not enabled by default.
-	 */
-
-	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
-		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
-
-		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
-		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
-		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
-		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
-	}
-
-#ifdef DEVICE_POLLING
-	ifp->if_capabilities |= IFCAP_POLLING;
-#endif
-
 	error = virtio_setup_intr(dev, INTR_TYPE_NET);
 	if (error) {
 		device_printf(dev, "cannot setup virtqueue interrupts\n");
-		ether_ifdetach(ifp);
+		/* BMV: This will crash if during boot! */
+		ether_ifdetach(sc->vtnet_ifp);
 		goto fail;
 	}
 
-	/*
-	 * Device defaults to promiscuous mode for backwards
-	 * compatibility. Turn it off if possible.
-	 */
-	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
-		VTNET_LOCK(sc);
-		if (vtnet_set_promisc(sc, 0) != 0) {
-			ifp->if_flags |= IFF_PROMISC;
-			device_printf(dev,
-			    "cannot disable promiscuous mode\n");
-		}
-		VTNET_UNLOCK(sc);
-	} else
-		ifp->if_flags |= IFF_PROMISC;
+	vtnet_start_taskqueues(sc);
 
 fail:
 	if (error)
@@ -461,24 +419,19 @@ vtnet_detach(device_t dev)
 	sc = device_get_softc(dev);
 	ifp = sc->vtnet_ifp;
 
-	KASSERT(mtx_initialized(VTNET_MTX(sc)),
-	    ("vtnet mutex not initialized"));
-
-#ifdef DEVICE_POLLING
-	if (ifp != NULL && ifp->if_capenable & IFCAP_POLLING)
-		ether_poll_deregister(ifp);
-#endif
-
 	if (device_is_attached(dev)) {
-		VTNET_LOCK(sc);
+		VTNET_CORE_LOCK(sc);
 		vtnet_stop(sc);
-		VTNET_UNLOCK(sc);
+		VTNET_CORE_UNLOCK(sc);
 
 		callout_drain(&sc->vtnet_tick_ch);
+		vtnet_drain_taskqueues(sc);
 
 		ether_ifdetach(ifp);
 	}
 
+	vtnet_free_taskqueues(sc);
+
 	if (sc->vtnet_vlan_attach != NULL) {
 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
 		sc->vtnet_vlan_attach = NULL;
@@ -488,25 +441,20 @@ vtnet_detach(device_t dev)
 		sc->vtnet_vlan_detach = NULL;
 	}
 
-	if (sc->vtnet_mac_filter != NULL) {
-		free(sc->vtnet_mac_filter, M_DEVBUF);
-		sc->vtnet_mac_filter = NULL;
-	}
+	ifmedia_removeall(&sc->vtnet_media);
 
 	if (ifp != NULL) {
 		if_free(ifp);
 		sc->vtnet_ifp = NULL;
 	}
 
-	if (sc->vtnet_rx_vq != NULL)
-		vtnet_free_rx_mbufs(sc);
-	if (sc->vtnet_tx_vq != NULL)
-		vtnet_free_tx_mbufs(sc);
+	vtnet_free_rxtx_queues(sc);
+	vtnet_free_rx_filters(sc);
+
 	if (sc->vtnet_ctrl_vq != NULL)
 		vtnet_free_ctrl_vq(sc);
 
-	ifmedia_removeall(&sc->vtnet_media);
-	VTNET_LOCK_DESTROY(sc);
+	VTNET_CORE_LOCK_DESTROY(sc);
 
 	return (0);
 }
@@ -518,10 +466,10 @@ vtnet_suspend(device_t dev)
 
 	sc = device_get_softc(dev);
 
-	VTNET_LOCK(sc);
+	VTNET_CORE_LOCK(sc);
 	vtnet_stop(sc);
 	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
-	VTNET_UNLOCK(sc);
+	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
@@ -535,11 +483,11 @@ vtnet_resume(device_t dev)
 	sc = device_get_softc(dev);
 	ifp = sc->vtnet_ifp;
 
-	VTNET_LOCK(sc);
+	VTNET_CORE_LOCK(sc);
 	if (ifp->if_flags & IFF_UP)
 		vtnet_init_locked(sc);
 	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
-	VTNET_UNLOCK(sc);
+	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
@@ -556,15 +504,26 @@ vtnet_shutdown(device_t dev)
 }
 
 static int
+vtnet_attach_completed(device_t dev)
+{
+
+	vtnet_attach_disable_promisc(device_get_softc(dev));
+
+	return (0);
+}
+
+static int
 vtnet_config_change(device_t dev)
 {
 	struct vtnet_softc *sc;
 
 	sc = device_get_softc(dev);
 
-	VTNET_LOCK(sc);
+	VTNET_CORE_LOCK(sc);
 	vtnet_update_link_status(sc);
-	VTNET_UNLOCK(sc);
+	if (sc->vtnet_link_active != 0)
+		vtnet_tx_start_all(sc);
+	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
@@ -578,188 +537,491 @@ vtnet_negotiate_features(struct vtnet_so
 	dev = sc->vtnet_dev;
 	mask = 0;
 
-	if (vtnet_csum_disable)
-		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
-
 	/*
-	 * TSO and LRO are only available when their corresponding
-	 * checksum offload feature is also negotiated.
+	 * TSO and LRO are only available when their corresponding checksum
+	 * offload feature is also negotiated.
 	 */
-
-	if (vtnet_csum_disable || vtnet_tso_disable)
-		mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
-		    VIRTIO_NET_F_HOST_ECN;
-
-	if (vtnet_csum_disable || vtnet_lro_disable)
+	if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
+		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
+		mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
+	}
+	if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
+		mask |= VTNET_TSO_FEATURES;
+	if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
 		mask |= VTNET_LRO_FEATURES;
+	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
+		mask |= VIRTIO_NET_F_MQ;
+#ifdef VTNET_LEGACY_TX
+	mask |= VIRTIO_NET_F_MQ;
+#endif
 
 	features = VTNET_FEATURES & ~mask;
-#ifdef VTNET_TX_INTR_MODERATION
-	features |= VIRTIO_F_NOTIFY_ON_EMPTY;
-#endif
 	sc->vtnet_features = virtio_negotiate_features(dev, features);
 
-	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0 &&
-	    virtio_with_feature(dev, VTNET_LRO_FEATURES)) {
-		/*
-		 * LRO without mergeable buffers requires special care. This
-		 * is not ideal because every receive buffer must be large
-		 * enough to hold the maximum TCP packet, the Ethernet header,
-		 * and the vtnet_rx_header. This requires up to 34 descriptors
-		 * when using MCLBYTES clusters. If we do not have indirect
-		 * descriptors, LRO is disabled since the virtqueue will not
-		 * be able to contain very many receive buffers.
-		 */
-		if (virtio_with_feature(dev,
-		    VIRTIO_RING_F_INDIRECT_DESC) == 0) {
-			device_printf(dev,
-			    "LRO disabled due to lack of both mergeable "
-			    "buffers and indirect descriptors\n");
+	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) == 0)
+		return;
+	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF))
+		return;
 
-			sc->vtnet_features = virtio_negotiate_features(dev,
-			    features & ~VTNET_LRO_FEATURES);
-		} else
-			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
-	}
+	/*
+	 * LRO without mergeable buffers requires special care. This is not
+	 * ideal because every receive buffer must be large enough to hold
+	 * the maximum TCP packet, the Ethernet header, and the header. This
+	 * requires up to 34 descriptors with MCLBYTES clusters. If we do
+	 * not have indirect descriptors, LRO is disabled since the virtqueue
+	 * will not contain very many receive buffers.
+	 */
+	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) {
+		device_printf(dev,
+		    "LRO disabled due to both mergeable buffers and indirect "
+		    "descriptors not negotiated\n");
+
+		features &= ~VTNET_LRO_FEATURES;
+		sc->vtnet_features = virtio_negotiate_features(dev, features);
+	} else
+		sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
 }
 
-static int
-vtnet_alloc_virtqueues(struct vtnet_softc *sc)
+static void
+vtnet_setup_features(struct vtnet_softc *sc)
 {
 	device_t dev;
-	struct vq_alloc_info vq_info[3];
-	int nvqs, rxsegs;
+	int max_pairs, max;
 
 	dev = sc->vtnet_dev;
-	nvqs = 2;
 
-	/*
-	 * Indirect descriptors are not needed for the Rx
-	 * virtqueue when mergeable buffers are negotiated.
-	 * The header is placed inline with the data, not
-	 * in a separate descriptor, and mbuf clusters are
-	 * always physically contiguous.
-	 */
-	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
-		rxsegs = sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG ?
-		    VTNET_MAX_RX_SEGS : VTNET_MIN_RX_SEGS;
+	vtnet_negotiate_features(sc);
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
+		/* This feature should always be negotiated. */
+		sc->vtnet_flags |= VTNET_FLAG_MAC;
+	}
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
+		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
+		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	} else
-		rxsegs = 0;
+		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
 
-	VQ_ALLOC_INFO_INIT(&vq_info[0], rxsegs,
-	    vtnet_rx_vq_intr, sc, &sc->vtnet_rx_vq,
-	    "%s receive", device_get_nameunit(dev));
-
-	VQ_ALLOC_INFO_INIT(&vq_info[1], VTNET_MAX_TX_SEGS,
-	    vtnet_tx_vq_intr, sc, &sc->vtnet_tx_vq,
-	    "%s transmit", device_get_nameunit(dev));
+	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
+		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
 
-	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
-		nvqs++;
+		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
+			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
+		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
+			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
+		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
+			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
+	}
 
-		VQ_ALLOC_INFO_INIT(&vq_info[2], 0, NULL, NULL,
-		    &sc->vtnet_ctrl_vq, "%s control",
-		    device_get_nameunit(dev));
+	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
+	    sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
+		max_pairs = virtio_read_dev_config_2(dev,
+		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
+		if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
+		    max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
+			max_pairs = 1;
+	} else
+		max_pairs = 1;
+
+	if (max_pairs > 1) {
+		/*
+		 * Limit the maximum number of queue pairs to the number of
+		 * CPUs or the configured maximum. The actual number of
+		 * queues that get used may be less.
+		 */
+		max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
+		if (max > 0 && max_pairs > max)
+			max_pairs = max;
+		if (max_pairs > mp_ncpus)
+			max_pairs = mp_ncpus;
+		if (max_pairs > VTNET_MAX_QUEUE_PAIRS)
+			max_pairs = VTNET_MAX_QUEUE_PAIRS;
+		if (max_pairs > 1)
+			sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
 	}
 
-	return (virtio_alloc_virtqueues(dev, 0, nvqs, vq_info));
+	sc->vtnet_max_vq_pairs = max_pairs;
 }
 
-static void
-vtnet_get_hwaddr(struct vtnet_softc *sc)
+static int
+vtnet_init_rxq(struct vtnet_softc *sc, int id)
 {
-	device_t dev;
+	struct vtnet_rxq *rxq;
 
-	dev = sc->vtnet_dev;
+	rxq = &sc->vtnet_rxqs[id];
 
-	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
-		virtio_read_device_config(dev,
-		    offsetof(struct virtio_net_config, mac),
-		    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
-	} else {
-		/* Generate random locally administered unicast address. */
-		sc->vtnet_hwaddr[0] = 0xB2;
-		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
+	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
+	    device_get_nameunit(sc->vtnet_dev), id);
+	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
 
-		vtnet_set_hwaddr(sc);
-	}
+	rxq->vtnrx_sc = sc;
+	rxq->vtnrx_id = id;
+
+	TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
+	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
+	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
+
+	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
 }
 
-static void
-vtnet_set_hwaddr(struct vtnet_softc *sc)
+static int
+vtnet_init_txq(struct vtnet_softc *sc, int id)
 {
-	device_t dev;
+	struct vtnet_txq *txq;
 
-	dev = sc->vtnet_dev;
+	txq = &sc->vtnet_txqs[id];
 
-	virtio_write_device_config(dev,
-	    offsetof(struct virtio_net_config, mac),
-	    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
+	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
+	    device_get_nameunit(sc->vtnet_dev), id);
+	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
+
+	txq->vtntx_sc = sc;
+	txq->vtntx_id = id;
+
+#ifndef VTNET_LEGACY_TX
+	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
+	    M_NOWAIT, &txq->vtntx_mtx);
+	if (txq->vtntx_br == NULL)
+		return (ENOMEM);
+
+	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
+#endif
+	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
+	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
+	    taskqueue_thread_enqueue, &txq->vtntx_tq);
+	if (txq->vtntx_tq == NULL)
+		return (ENOMEM);
+
+	return (0);
 }
 
 static int
-vtnet_is_link_up(struct vtnet_softc *sc)
+vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
 {
-	device_t dev;
-	struct ifnet *ifp;
-	uint16_t status;
+	int i, npairs, error;
 
-	dev = sc->vtnet_dev;
-	ifp = sc->vtnet_ifp;
+	npairs = sc->vtnet_max_vq_pairs;
 
-	VTNET_LOCK_ASSERT(sc);
+	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
+	    M_NOWAIT | M_ZERO);
+	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
+	    M_NOWAIT | M_ZERO);
+	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
+		return (ENOMEM);
 
-	if ((ifp->if_capenable & IFCAP_LINKSTATE) == 0)
-		return (1);
+	for (i = 0; i < npairs; i++) {
+		error = vtnet_init_rxq(sc, i);
+		if (error)
+			return (error);
+		error = vtnet_init_txq(sc, i);
+		if (error)
+			return (error);
+	}
 
-	status = virtio_read_dev_config_2(dev,
-	    offsetof(struct virtio_net_config, status));
+	vtnet_setup_queue_sysctl(sc);
 
-	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
+	return (0);
 }
 
 static void
-vtnet_update_link_status(struct vtnet_softc *sc)
+vtnet_destroy_rxq(struct vtnet_rxq *rxq)
 {
-	struct ifnet *ifp;
-	int link;
-
-	ifp = sc->vtnet_ifp;
 
-	link = vtnet_is_link_up(sc);
+	rxq->vtnrx_sc = NULL;
+	rxq->vtnrx_id = -1;
 
-	if (link && ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0)) {
-		sc->vtnet_flags |= VTNET_FLAG_LINK;
-		if_link_state_change(ifp, LINK_STATE_UP);
-		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
-			vtnet_start_locked(ifp);
-	} else if (!link && (sc->vtnet_flags & VTNET_FLAG_LINK)) {
-		sc->vtnet_flags &= ~VTNET_FLAG_LINK;
-		if_link_state_change(ifp, LINK_STATE_DOWN);
-	}
+	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
+		mtx_destroy(&rxq->vtnrx_mtx);
 }
 
 static void
-vtnet_watchdog(struct vtnet_softc *sc)
+vtnet_destroy_txq(struct vtnet_txq *txq)
 {
-	struct ifnet *ifp;
 
-	ifp = sc->vtnet_ifp;
+	txq->vtntx_sc = NULL;
+	txq->vtntx_id = -1;
 
-#ifdef VTNET_TX_INTR_MODERATION
-	vtnet_txeof(sc);
+#ifndef VTNET_LEGACY_TX
+	if (txq->vtntx_br != NULL) {
+		buf_ring_free(txq->vtntx_br, M_DEVBUF);
+		txq->vtntx_br = NULL;
+	}
 #endif
 
-	if (sc->vtnet_watchdog_timer == 0 || --sc->vtnet_watchdog_timer)
-		return;
+	if (mtx_initialized(&txq->vtntx_mtx) != 0)
+		mtx_destroy(&txq->vtntx_mtx);
+}
+
+static void
+vtnet_free_rxtx_queues(struct vtnet_softc *sc)
+{
+	int i;
+
+	if (sc->vtnet_rxqs != NULL) {
+		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
+			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
+		free(sc->vtnet_rxqs, M_DEVBUF);
+		sc->vtnet_rxqs = NULL;
+	}
+
+	if (sc->vtnet_txqs != NULL) {
+		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
+			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
+		free(sc->vtnet_txqs, M_DEVBUF);
+		sc->vtnet_txqs = NULL;
+	}
+}
+
+static int
+vtnet_alloc_rx_filters(struct vtnet_softc *sc)
+{
+
+	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
+		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
+		    M_DEVBUF, M_NOWAIT | M_ZERO);
+		if (sc->vtnet_mac_filter == NULL)
+			return (ENOMEM);
+	}
+
+	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
+		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
+		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201309010433.r814Xl5r005360>