Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 26 Jan 2012 16:35:09 +0000 (UTC)
From:      "Kenneth D. Merry" <ken@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r230587 - in head: share/man/man4 sys/dev/xen/blkback sys/dev/xen/netback sys/kern sys/xen/interface/io
Message-ID:  <201201261635.q0QGZ9mR096638@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: ken
Date: Thu Jan 26 16:35:09 2012
New Revision: 230587
URL: http://svn.freebsd.org/changeset/base/230587

Log:
  Xen netback driver rewrite.
  
  share/man/man4/Makefile,
  share/man/man4/xnb.4,
  sys/dev/xen/netback/netback.c,
  sys/dev/xen/netback/netback_unit_tests.c:
  
  	Rewrote the netback driver for xen to attach properly via newbus
  	and work properly in both HVM and PVM mode (only HVM is tested).
  	Works with the in-tree FreeBSD netfront driver or the Windows
  	netfront driver from SuSE.  Has not been extensively tested with
  	a Linux netfront driver.  Does not implement LRO, TSO, or
  	polling.  Includes unit tests that may be run through sysctl
  	after compiling with XNB_DEBUG defined.
  
  sys/dev/xen/blkback/blkback.c,
  sys/xen/interface/io/netif.h:
  
  	Comment elaboration.
  
  sys/kern/uipc_mbuf.c:
  
  	Fix page fault in kernel mode when calling m_print() on a
  	null mbuf.  Since m_print() is only used for debugging, there
  	are no performance concerns for extra error checking code.
  
  sys/kern/subr_scanf.c:
  
  	Add the "hh" and "ll" width specifiers from C99 to scanf().
  	A few callers were already using "ll" even though scanf()
  	was handling it as "l".
  
  Submitted by:	Alan Somers <alans@spectralogic.com>
  Submitted by:	John Suykerbuyk <johns@spectralogic.com>
  Sponsored by:	Spectra Logic
  MFC after:	1 week
  Reviewed by:	ken

Added:
  head/share/man/man4/xnb.4   (contents, props changed)
  head/sys/dev/xen/netback/netback_unit_tests.c   (contents, props changed)
Modified:
  head/share/man/man4/Makefile
  head/sys/dev/xen/blkback/blkback.c
  head/sys/dev/xen/netback/netback.c
  head/sys/kern/subr_scanf.c
  head/sys/kern/uipc_mbuf.c
  head/sys/xen/interface/io/netif.h

Modified: head/share/man/man4/Makefile
==============================================================================
--- head/share/man/man4/Makefile	Thu Jan 26 15:23:45 2012	(r230586)
+++ head/share/man/man4/Makefile	Thu Jan 26 16:35:09 2012	(r230587)
@@ -531,6 +531,7 @@ MAN=	aac.4 \
 	${_xen.4} \
 	xhci.4 \
 	xl.4 \
+	${_xnb.4} \
 	xpt.4 \
 	zero.4 \
 	zyd.4
@@ -731,6 +732,7 @@ _urtw.4=	urtw.4
 _viawd.4=	viawd.4
 _wpi.4=		wpi.4
 _xen.4=		xen.4
+_xnb.4=		xnb.4
 
 MLINKS+=lindev.4 full.4
 .endif

Added: head/share/man/man4/xnb.4
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/share/man/man4/xnb.4	Thu Jan 26 16:35:09 2012	(r230587)
@@ -0,0 +1,134 @@
+.\" Copyright (c) 2012 Spectra Logic Corporation
+.\"	All rights reserved.
+.\"
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions, and the following disclaimer,
+.\"    without modification.
+.\" 2. Redistributions in binary form must reproduce at minimum a disclaimer
+.\"    substantially similar to the "NO WARRANTY" disclaimer below
+.\"    ("Disclaimer") and any redistribution must be conditioned upon
+.\"    including a substantially similar Disclaimer requirement for further
+.\"    binary redistribution.
+.\" 
+.\" NO WARRANTY
+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+.\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+.\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+.\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGES.
+.\" 
+.\" Authors: Alan Somers         (Spectra Logic Corporation)
+.\" 
+.\" $FreeBSD$
+.\"
+
+.Dd January 6, 2012
+.Dt XNB 4
+.Os 
+.Sh NAME
+.Nm xnb
+.Nd "Xen Paravirtualized Backend Ethernet Driver"
+.Sh SYNOPSIS
+To compile this driver into the kernel, place the following lines in your
+kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "options XENHVM"
+.Cd "device xenpci"
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+driver provides the back half of a paravirtualized
+.Xr xen 4
+network connection.  The netback and netfront drivers appear to their
+respective operating systems as Ethernet devices linked by a crossover cable.
+Typically,
+.Nm
+will run on Domain 0 and the netfront driver will run on a guest domain.
+However, it is also possible to run
+.Nm
+on a guest domain.  It may be bridged or routed to provide the netfront's
+domain access to other guest domains or to a physical network.
+.Pp
+In most respects, the
+.Nm
+device appears to the OS as an other Ethernet device.  It can be configured at
+runtime entirely with
+.Xr ifconfig 8
+\&.  In particular, it supports MAC changing, arbitrary MTU sizes, checksum
+offload for IP, UDP, and TCP for both receive and transmit, and TSO.  However,
+see
+.Sx CAVEATS
+before enabling txcsum, rxcsum, or tso.
+.Sh SYSCTL VARIABLES
+The following read-only variables are available via
+.Xr sysctl 8 :
+.Bl -tag -width indent
+.It Va dev.xnb.%d.dump_rings
+Displays information about the ring buffers used to pass requests between the
+netfront and netback.  Mostly useful for debugging, but can also be used to
+get traffic statistics.
+.It Va dev.xnb.%d.unit_test_results
+Runs a builtin suite of unit tests and displays the results.  Does not affect
+the operation of the driver in any way.  Note that the test suite simulates
+error conditions; this will result in error messages being printed to the
+system system log.
+.Sh CAVEATS
+Packets sent through Xennet pass over shared memory, so the protocol includes
+no form of link-layer checksum or CRC.  Furthermore, Xennet drivers always
+report to their hosts that they support receive and transmit checksum
+offloading.  They "offload" the checksum calculation by simply skipping it.
+That works fine for packets that are exchanged between two domains on the same
+machine.  However, when a Xennet interface is bridged to a physical interface,
+a correct checksum must be attached to any packets bound for that physical
+interface.  Currently, FreeBSD lacks any mechanism for an ethernet device to
+inform the OS that newly received packets are valid even though their checksums
+are not.  So if the netfront driver is configured to offload checksum
+calculations, it will pass non-checksumed packets to
+.Nm
+, which must then calculate the checksum in software before passing the packet
+to the OS.
+.Pp
+For this reason, it is recommended that if
+.Nm
+is bridged to a physcal interface, then transmit checksum offloading should be
+disabled on the netfront.  The Xennet protocol does not have any mechanism for
+the netback to request the netfront to do this; the operator must do it
+manually.
+.Sh SEE ALSO
+.Xr arp 4 ,
+.Xr netintro 4 ,
+.Xr ng_ether 4 ,
+.Xr ifconfig 8 ,
+.Xr xen 4
+.Sh HISTORY
+The
+.Nm
+device driver first appeared in
+.Fx 10.0
+.
+.Sh AUTHORS
+The
+.Nm
+driver was written by
+.An Alan Somers
+.Aq alans@spectralogic.com
+and
+.An John Suykerbuyk
+.Aq johns@spectralogic.com
+.Sh BUGS
+The
+.Nm
+driver does not properly checksum UDP datagrams that span more than one
+Ethernet frame.  Nor does it correctly checksum IPv6 packets.  To workaround
+that bug, disable transmit checksum offloading on the netfront driver.

Modified: head/sys/dev/xen/blkback/blkback.c
==============================================================================
--- head/sys/dev/xen/blkback/blkback.c	Thu Jan 26 15:23:45 2012	(r230586)
+++ head/sys/dev/xen/blkback/blkback.c	Thu Jan 26 16:35:09 2012	(r230587)
@@ -3434,6 +3434,10 @@ xbb_shutdown(struct xbb_softc *xbb)
 
 	DPRINTF("\n");
 
+	/*
+	 * Before unlocking mutex, set this flag to prevent other threads from
+	 * getting into this function
+	 */
 	xbb->flags |= XBBF_IN_SHUTDOWN;
 	mtx_unlock(&xbb->lock);
 

Modified: head/sys/dev/xen/netback/netback.c
==============================================================================
--- head/sys/dev/xen/netback/netback.c	Thu Jan 26 15:23:45 2012	(r230586)
+++ head/sys/dev/xen/netback/netback.c	Thu Jan 26 16:35:09 2012	(r230587)
@@ -1,1595 +1,2535 @@
-/*
- * Copyright (c) 2006, Cisco Systems, Inc.
+/*-
+ * Copyright (c) 2009-2011 Spectra Logic Corporation
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without 
- * modification, are permitted provided that the following conditions 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
  * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
  *
- * 1. Redistributions of source code must retain the above copyright 
- *    notice, this list of conditions and the following disclaimer. 
- * 2. Redistributions in binary form must reproduce the above copyright 
- *    notice, this list of conditions and the following disclaimer in the 
- *    documentation and/or other materials provided with the distribution. 
- * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors 
- *    may be used to endorse or promote products derived from this software 
- *    without specific prior written permission. 
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
- * POSSIBILITY OF SUCH DAMAGE.
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
+ *          Alan Somers         (Spectra Logic Corporation)
+ *          John Suykerbuyk     (Spectra Logic Corporation)
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
+
+/**
+ * \file netback.c
+ *
+ * \brief Device driver supporting the vending of network access
+ * 	  from this FreeBSD domain to other domains.
+ */
+#include "opt_inet.h"
+#include "opt_global.h"
+
 #include "opt_sctp.h"
 
 #include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/sockio.h>
-#include <sys/mbuf.h>
-#include <sys/malloc.h>
 #include <sys/kernel.h>
-#include <sys/socket.h>
-#include <sys/queue.h>
-#include <sys/taskqueue.h>
 
-#include <sys/module.h>
 #include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
-#include <net/if_types.h>
 #include <net/ethernet.h>
-#include <net/if_bridgevar.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
 
-#include <netinet/in_systm.h>
 #include <netinet/in.h>
-#include <netinet/in_var.h>
 #include <netinet/ip.h>
+#include <netinet/if_ether.h>
+#if __FreeBSD_version >= 700000
 #include <netinet/tcp.h>
-#include <netinet/udp.h>
-#ifdef SCTP
-#include <netinet/sctp.h>
-#include <netinet/sctp_crc32.h>
 #endif
+#include <netinet/ip_icmp.h>
+#include <netinet/udp.h>
+#include <machine/in_cksum.h>
 
-#include <vm/vm_extern.h>
-#include <vm/vm_kern.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
 
-#include <machine/in_cksum.h>
-#include <machine/xen-os.h>
-#include <machine/hypervisor.h>
-#include <machine/hypervisor-ifs.h>
-#include <machine/xen_intr.h>
-#include <machine/evtchn.h>
-#include <machine/xenbus.h>
-#include <machine/gnttab.h>
-#include <machine/xen-public/memory.h>
-#include <dev/xen/xenbus/xenbus_comms.h>
+#include <machine/_inttypes.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenvar.h>
+
+#include <xen/evtchn.h>
+#include <xen/xen_intr.h>
+#include <xen/interface/io/netif.h>
+#include <xen/xenbus/xenbusvar.h>
+
+/*--------------------------- Compile-time Tunables --------------------------*/
 
+/*---------------------------------- Macros ----------------------------------*/
+/**
+ * Custom malloc type for all driver allocations.
+ */
+static MALLOC_DEFINE(M_XENNETBACK, "xnb", "Xen Net Back Driver Data");
 
-#ifdef XEN_NETBACK_DEBUG
-#define DPRINTF(fmt, args...) \
-    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#define	XNB_SG	1	/* netback driver supports feature-sg */
+#define	XNB_GSO_TCPV4 1	/* netback driver supports feature-gso-tcpv4 */
+#define	XNB_RX_COPY 1	/* netback driver supports feature-rx-copy */
+#define	XNB_RX_FLIP 0	/* netback driver does not support feature-rx-flip */
+
+#undef XNB_DEBUG
+#define	XNB_DEBUG /* hardcode on during development */
+
+#ifdef XNB_DEBUG
+#define	DPRINTF(fmt, args...) \
+	printf("xnb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
 #else
-#define DPRINTF(fmt, args...) ((void)0)
+#define	DPRINTF(fmt, args...) do {} while (0)
 #endif
 
-#ifdef XEN_NETBACK_DEBUG_LOTS
-#define DDPRINTF(fmt, args...) \
-    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
-#define DPRINTF_MBUF(_m) print_mbuf(_m, 0)
-#define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len)
-#else
-#define DDPRINTF(fmt, args...) ((void)0)
-#define DPRINTF_MBUF(_m) ((void)0)
-#define DPRINTF_MBUF_LEN(_m, _len) ((void)0)
+/* Default length for stack-allocated grant tables */
+#define	GNTTAB_LEN	(64)
+
+/* Features supported by all backends.  TSO and LRO can be negotiated */
+#define	XNB_CSUM_FEATURES	(CSUM_TCP | CSUM_UDP)
+
+#define	NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+#define	NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+
+/**
+ * Two argument version of the standard macro.  Second argument is a tentative
+ * value of req_cons
+ */
+#define	RING_HAS_UNCONSUMED_REQUESTS_2(_r, cons) ({                     \
+	unsigned int req = (_r)->sring->req_prod - cons;          	\
+	unsigned int rsp = RING_SIZE(_r) -                              \
+	(cons - (_r)->rsp_prod_pvt);                          		\
+	req < rsp ? req : rsp;                                          \
+})
+
+#define	virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
+#define	virt_to_offset(x) ((x) & (PAGE_SIZE - 1))
+
+/**
+ * Predefined array type of grant table copy descriptors.  Used to pass around
+ * statically allocated memory structures.
+ */
+typedef struct gnttab_copy gnttab_copy_table[GNTTAB_LEN];
+
+/*--------------------------- Forward Declarations ---------------------------*/
+struct xnb_softc;
+struct xnb_pkt;
+
+static void	xnb_attach_failed(struct xnb_softc *xnb,
+				  int err, const char *fmt, ...)
+				  __printflike(3,4);
+static int	xnb_shutdown(struct xnb_softc *xnb);
+static int	create_netdev(device_t dev);
+static int	xnb_detach(device_t dev);
+static int	xen_net_read_mac(device_t dev, uint8_t mac[]);
+static int	xnb_ifmedia_upd(struct ifnet *ifp);
+static void	xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
+static void 	xnb_intr(void *arg);
+static int	xnb_send(netif_rx_back_ring_t *rxb, domid_t otherend,
+			 const struct mbuf *mbufc, gnttab_copy_table gnttab);
+static int	xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend,
+			 struct mbuf **mbufc, struct ifnet *ifnet,
+			 gnttab_copy_table gnttab);
+static int	xnb_ring2pkt(struct xnb_pkt *pkt,
+			     const netif_tx_back_ring_t *tx_ring,
+			     RING_IDX start);
+static void	xnb_txpkt2rsp(const struct xnb_pkt *pkt,
+			      netif_tx_back_ring_t *ring, int error);
+static struct mbuf *xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp);
+static int	xnb_txpkt2gnttab(const struct xnb_pkt *pkt,
+				 const struct mbuf *mbufc,
+				 gnttab_copy_table gnttab,
+				 const netif_tx_back_ring_t *txb,
+				 domid_t otherend_id);
+static void	xnb_update_mbufc(struct mbuf *mbufc,
+				 const gnttab_copy_table gnttab, int n_entries);
+static int	xnb_mbufc2pkt(const struct mbuf *mbufc,
+			      struct xnb_pkt *pkt,
+			      RING_IDX start, int space);
+static int	xnb_rxpkt2gnttab(const struct xnb_pkt *pkt,
+				 const struct mbuf *mbufc,
+				 gnttab_copy_table gnttab,
+				 const netif_rx_back_ring_t *rxb,
+				 domid_t otherend_id);
+static int	xnb_rxpkt2rsp(const struct xnb_pkt *pkt,
+			      const gnttab_copy_table gnttab, int n_entries,
+			      netif_rx_back_ring_t *ring);
+static void	xnb_add_mbuf_cksum(struct mbuf *mbufc);
+static void	xnb_stop(struct xnb_softc*);
+static int	xnb_ioctl(struct ifnet*, u_long, caddr_t);
+static void	xnb_start_locked(struct ifnet*);
+static void	xnb_start(struct ifnet*);
+static void	xnb_ifinit_locked(struct xnb_softc*);
+static void	xnb_ifinit(void*);
+#ifdef XNB_DEBUG
+static int	xnb_unit_test_main(SYSCTL_HANDLER_ARGS);
+static int	xnb_dump_rings(SYSCTL_HANDLER_ARGS);
 #endif
+/*------------------------------ Data Structures -----------------------------*/
+
+
+/**
+ * Representation of a xennet packet.  Simplified version of a packet as
+ * stored in the Xen tx ring.  Applicable to both RX and TX packets
+ */
+struct xnb_pkt{
+	/**
+	 * Array index of the first data-bearing (eg, not extra info) entry
+	 * for this packet
+	 */
+	RING_IDX	car;
+
+	/**
+	 * Array index of the second data-bearing entry for this packet.
+	 * Invalid if the packet has only one data-bearing entry.  If the
+	 * packet has more than two data-bearing entries, then the second
+	 * through the last will be sequential modulo the ring size
+	 */
+	RING_IDX	cdr;
 
-#define WPRINTF(fmt, args...) \
-    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+	/**
+	 * Optional extra info.  Only valid if flags contains
+	 * NETTXF_extra_info.  Note that extra.type will always be
+	 * XEN_NETIF_EXTRA_TYPE_GSO.  Currently, no known netfront or netback
+	 * driver will ever set XEN_NETIF_EXTRA_TYPE_MCAST_*
+	 */
+	netif_extra_info_t extra;
+
+	/** Size of entire packet in bytes.       */
+	uint16_t	size;
 
-#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
-#define BUG_ON PANIC_IF
+	/** The size of the first entry's data in bytes */
+	uint16_t	car_size;
 
-#define IFNAME(_np) (_np)->ifp->if_xname
+	/**
+	 * Either NETTXF_ or NETRXF_ flags.  Note that the flag values are
+	 * not the same for TX and RX packets
+	 */
+	uint16_t	flags;
 
-#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
-#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+	/**
+	 * The number of valid data-bearing entries (either netif_tx_request's
+	 * or netif_rx_response's) in the packet.  If this is 0, it means the
+	 * entire packet is invalid.
+	 */
+	uint16_t	list_len;
 
-struct ring_ref {
-	vm_offset_t va;
-	grant_handle_t handle;
-	uint64_t bus_addr;
+	/** There was an error processing the packet */
+	uint8_t		error;
 };
 
-typedef struct netback_info {
+/** xnb_pkt method: initialize it */
+static inline void
+xnb_pkt_initialize(struct xnb_pkt *pxnb)
+{
+	bzero(pxnb, sizeof(*pxnb));
+}
 
-	/* Schedule lists */
-	STAILQ_ENTRY(netback_info) next_tx;
-	STAILQ_ENTRY(netback_info) next_rx;
-	int on_tx_sched_list;
-	int on_rx_sched_list;
-
-	struct xenbus_device *xdev;
-	XenbusState frontend_state;
-
-	domid_t domid;
-	int handle;
-	char *bridge;
-
-	int rings_connected;
-	struct ring_ref tx_ring_ref;
-	struct ring_ref rx_ring_ref;
-	netif_tx_back_ring_t tx;
-	netif_rx_back_ring_t rx;
-	evtchn_port_t evtchn;
-	int irq;
-	void *irq_cookie;
+/** xnb_pkt method: mark the packet as valid */
+static inline void
+xnb_pkt_validate(struct xnb_pkt *pxnb)
+{
+	pxnb->error = 0;
+};
 
-	struct ifnet *ifp;
-	int ref_cnt;
+/** xnb_pkt method: mark the packet as invalid */
+static inline void
+xnb_pkt_invalidate(struct xnb_pkt *pxnb)
+{
+	pxnb->error = 1;
+};
 
-	device_t ndev;
-	int attached;
-} netif_t;
-
-
-#define MAX_PENDING_REQS 256
-#define PKT_PROT_LEN 64
-
-static struct {
-	netif_tx_request_t req;
-	netif_t *netif;
-} pending_tx_info[MAX_PENDING_REQS];
-static uint16_t pending_ring[MAX_PENDING_REQS];
-typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-static PEND_RING_IDX pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-
-static unsigned long mmap_vstart;
-#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
-
-/* Freed TX mbufs get batched on this ring before return to pending_ring. */
-static uint16_t dealloc_ring[MAX_PENDING_REQS];
-static PEND_RING_IDX dealloc_prod, dealloc_cons;
-
-static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
-static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
-static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
-
-static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
-static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
-
-static struct task net_tx_task, net_rx_task;
-static struct callout rx_task_callout;
-
-static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list =
-	STAILQ_HEAD_INITIALIZER(tx_sched_list);
-static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list =
-	STAILQ_HEAD_INITIALIZER(rx_sched_list);
-static struct mtx tx_sched_list_lock;
-static struct mtx rx_sched_list_lock;
-
-static int vif_unit_maker = 0;
-
-/* Protos */
-static void netback_start(struct ifnet *ifp);
-static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
-static int vif_add_dev(struct xenbus_device *xdev);
-static void disconnect_rings(netif_t *netif);
+/** xnb_pkt method: Check whether the packet is valid */
+static inline int
+xnb_pkt_is_valid(const struct xnb_pkt *pxnb)
+{
+	return (! pxnb->error);
+}
+
+#ifdef XNB_DEBUG
+/** xnb_pkt method: print the packet's contents in human-readable format*/
+static void __unused
+xnb_dump_pkt(const struct xnb_pkt *pkt) {
+	if (pkt == NULL) {
+	  DPRINTF("Was passed a null pointer.\n");
+	  return;
+	}
+	DPRINTF("pkt address= %p\n", pkt);
+	DPRINTF("pkt->size=%d\n", pkt->size);
+	DPRINTF("pkt->car_size=%d\n", pkt->car_size);
+	DPRINTF("pkt->flags=0x%04x\n", pkt->flags);
+	DPRINTF("pkt->list_len=%d\n", pkt->list_len);
+	/* DPRINTF("pkt->extra");	TODO */
+	DPRINTF("pkt->car=%d\n", pkt->car);
+	DPRINTF("pkt->cdr=%d\n", pkt->cdr);
+	DPRINTF("pkt->error=%d\n", pkt->error);
+}
+#endif /* XNB_DEBUG */
 
-#ifdef XEN_NETBACK_DEBUG_LOTS
-/* Debug code to display the contents of an mbuf */
 static void
-print_mbuf(struct mbuf *m, int max)
+xnb_dump_txreq(RING_IDX idx, const struct netif_tx_request *txreq)
 {
-	int i, j=0;
-	printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len);
-	for (; m; m = m->m_next) {
-		unsigned char *d = m->m_data;
-		for (i=0; i < m->m_len; i++) {
-			if (max && j == max)
-				break;
-			if ((j++ % 16) == 0)
-				printf("\n%04x:", j);
-			printf(" %02x", d[i]);
-		}
+	if (txreq != NULL) {
+		DPRINTF("netif_tx_request index =%u\n", idx);
+		DPRINTF("netif_tx_request.gref  =%u\n", txreq->gref);
+		DPRINTF("netif_tx_request.offset=%hu\n", txreq->offset);
+		DPRINTF("netif_tx_request.flags =%hu\n", txreq->flags);
+		DPRINTF("netif_tx_request.id    =%hu\n", txreq->id);
+		DPRINTF("netif_tx_request.size  =%hu\n", txreq->size);
 	}
-	printf("\n");
 }
-#endif
 
 
-#define MAX_MFN_ALLOC 64
-static unsigned long mfn_list[MAX_MFN_ALLOC];
-static unsigned int alloc_index = 0;
+/**
+ * \brief Configuration data for a shared memory request ring
+ *        used to communicate with the front-end client of this
+ *        this driver.
+ */
+struct xnb_ring_config {
+	/**
+	 * Runtime structures for ring access.  Unfortunately, TX and RX rings
+	 * use different data structures, and that cannot be changed since it
+	 * is part of the interdomain protocol.
+	 */
+	union{
+		netif_rx_back_ring_t	  rx_ring;
+		netif_tx_back_ring_t	  tx_ring;
+	} back_ring;
+
+	/**
+	 * The device bus address returned by the hypervisor when
+	 * mapping the ring and required to unmap it when a connection
+	 * is torn down.
+	 */
+	uint64_t	bus_addr;
 
-static unsigned long
-alloc_mfn(void)
-{
-	unsigned long mfn = 0;
-	struct xen_memory_reservation reservation = {
-		.extent_start = mfn_list,
-		.nr_extents   = MAX_MFN_ALLOC,
-		.extent_order = 0,
-		.domid        = DOMID_SELF
-	};
-	if ( unlikely(alloc_index == 0) )
-		alloc_index = HYPERVISOR_memory_op(
-			XENMEM_increase_reservation, &reservation);
-	if ( alloc_index != 0 )
-		mfn = mfn_list[--alloc_index];
-	return mfn;
-}
+	/** The pseudo-physical address where ring memory is mapped.*/
+	uint64_t	gnt_addr;
 
-static unsigned long
-alloc_empty_page_range(unsigned long nr_pages)
+	/** KVA address where ring memory is mapped. */
+	vm_offset_t	va;
+
+	/**
+	 * Grant table handles, one per-ring page, returned by the
+	 * hyperpervisor upon mapping of the ring and required to
+	 * unmap it when a connection is torn down.
+	 */
+	grant_handle_t	handle;
+
+	/** The number of ring pages mapped for the current connection. */
+	unsigned	ring_pages;
+
+	/**
+	 * The grant references, one per-ring page, supplied by the
+	 * front-end, allowing us to reference the ring pages in the
+	 * front-end's domain and to map these pages into our own domain.
+	 */
+	grant_ref_t	ring_ref;
+};
+
+/**
+ * Per-instance connection state flags.
+ */
+typedef enum
 {
-	void *pages;
-	int i = 0, j = 0;
-	multicall_entry_t mcl[17];
-	unsigned long mfn_list[16];
-	struct xen_memory_reservation reservation = {
-		.extent_start = mfn_list,
-		.nr_extents   = 0,
-		.address_bits = 0,
-		.extent_order = 0,
-		.domid        = DOMID_SELF
-	};
+	/** Communication with the front-end has been established. */
+	XNBF_RING_CONNECTED    = 0x01,
+
+	/**
+	 * Front-end requests exist in the ring and are waiting for
+	 * xnb_xen_req objects to free up.
+	 */
+	XNBF_RESOURCE_SHORTAGE = 0x02,
 
-	pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
-	if (pages == NULL)
-		return 0;
+	/** Connection teardown has started. */
+	XNBF_SHUTDOWN          = 0x04,
 
-	memset(mcl, 0, sizeof(mcl));
+	/** A thread is already performing shutdown processing. */
+	XNBF_IN_SHUTDOWN       = 0x08
+} xnb_flag_t;
 
-	while (i < nr_pages) {
-		unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
+/**
+ * Types of rings.  Used for array indices and to identify a ring's control
+ * data structure type
+ */
+typedef enum{
+	XNB_RING_TYPE_TX = 0,	/* ID of TX rings, used for array indices */
+	XNB_RING_TYPE_RX = 1,	/* ID of RX rings, used for array indices */
+	XNB_NUM_RING_TYPES
+} xnb_ring_type_t;
 
-		mcl[j].op = __HYPERVISOR_update_va_mapping;
-		mcl[j].args[0] = va;
+/**
+ * Per-instance configuration data.
+ */
+struct xnb_softc {
+	/** NewBus device corresponding to this instance. */
+	device_t		dev;
+
+	/* Media related fields */
+
+	/** Generic network media state */
+	struct ifmedia		sc_media;
+
+	/** Media carrier info */
+	struct ifnet 		*xnb_ifp;
+
+	/** Our own private carrier state */
+	unsigned carrier;
+
+	/** Device MAC Address */
+	uint8_t			mac[ETHER_ADDR_LEN];
+
+	/* Xen related fields */
+
+	/**
+	 * \brief The netif protocol abi in effect.
+	 *
+	 * There are situations where the back and front ends can
+	 * have a different, native abi (e.g. intel x86_64 and
+	 * 32bit x86 domains on the same machine).  The back-end
+	 * always accomodates the front-end's native abi.  That
+	 * value is pulled from the XenStore and recorded here.
+	 */
+	int			abi;
 
-		mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
+	/**
+	 * Name of the bridge to which this VIF is connected, if any
+	 * This field is dynamically allocated by xenbus and must be free()ed
+	 * when no longer needed
+	 */
+	char			*bridge;
 
-		xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
+	/** The interrupt driven even channel used to signal ring events. */
+	evtchn_port_t		evtchn;
 
-		if (j == 16 || i == nr_pages) {
-			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
+	/** Xen device handle.*/
+	long 			handle;
 
-			reservation.nr_extents = j;
+	/** IRQ mapping for the communication ring event channel. */
+	int			irq;
+
+	/**
+	 * \brief Cached value of the front-end's domain id.
+	 *
+	 * This value is used at once for each mapped page in
+	 * a transaction.  We cache it to avoid incuring the
+	 * cost of an ivar access every time this is needed.
+	 */
+	domid_t			otherend_id;
 
-			mcl[j].op = __HYPERVISOR_memory_op;
-			mcl[j].args[0] = XENMEM_decrease_reservation;
-			mcl[j].args[1] =  (unsigned long)&reservation;
-			
-			(void)HYPERVISOR_multicall(mcl, j+1);
+	/**
+	 * Undocumented frontend feature.  Has something to do with
+	 * scatter/gather IO
+	 */
+	uint8_t			can_sg;
+	/** Undocumented frontend feature */
+	uint8_t			gso;
+	/** Undocumented frontend feature */
+	uint8_t			gso_prefix;
+	/** Can checksum TCP/UDP over IPv4 */
+	uint8_t			ip_csum;
+
+	/* Implementation related fields */
+	/**
+	 * Preallocated grant table copy descriptor for RX operations.
+	 * Access must be protected by rx_lock
+	 */
+	gnttab_copy_table	rx_gnttab;
 
-			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
-			j = 0;
-		}
-	}
+	/**
+	 * Preallocated grant table copy descriptor for TX operations.
+	 * Access must be protected by tx_lock
+	 */
+	gnttab_copy_table	tx_gnttab;
 
-	return (unsigned long)pages;
-}
+#ifdef XENHVM
+	/**
+	 * Resource representing allocated physical address space
+	 * associated with our per-instance kva region.
+	 */
+	struct resource		*pseudo_phys_res;
 
-#ifdef XEN_NETBACK_FIXUP_CSUM
-static void
-fixup_checksum(struct mbuf *m)
-{
-	struct ether_header *eh = mtod(m, struct ether_header *);
-	struct ip *ip = (struct ip *)(eh + 1);
-	int iphlen = ip->ip_hl << 2;
-	int iplen = ntohs(ip->ip_len);
-
-	if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
-		struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen);
-		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
-			htons(IPPROTO_TCP + (iplen - iphlen)));
-		th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen);
-		m->m_pkthdr.csum_flags &= ~CSUM_TCP;
-#ifdef SCTP
-	} else if (sw_csum & CSUM_SCTP) {
-		sctp_delayed_cksum(m, iphlen);
-		sw_csum &= ~CSUM_SCTP;
-#endif
-	} else {
-		u_short csum;
-		struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen);
-		uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
-			htons(IPPROTO_UDP + (iplen - iphlen)));
-		if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0)
-			csum = 0xffff;
-		uh->uh_sum = csum;
-		m->m_pkthdr.csum_flags &= ~CSUM_UDP;
-	}
-}
+	/** Resource id for allocated physical address space. */
+	int			pseudo_phys_res_id;
 #endif
 
-/* Add the interface to the specified bridge */
-static int
-add_to_bridge(struct ifnet *ifp, char *bridge)
-{
-	struct ifdrv ifd;
-	struct ifbreq ifb;
-	struct ifnet *ifp_bridge = ifunit(bridge);
+	/** Ring mapping and interrupt configuration data. */
+	struct xnb_ring_config	ring_configs[XNB_NUM_RING_TYPES];
 
-	if (!ifp_bridge)
-		return ENOENT;
+	/**
+	 * Global pool of kva used for mapping remote domain ring
+	 * and I/O transaction data.
+	 */
+	vm_offset_t		kva;
 
-	bzero(&ifd, sizeof(ifd));
-	bzero(&ifb, sizeof(ifb));
+	/** Psuedo-physical address corresponding to kva. */
+	uint64_t		gnt_base_addr;
 
-	strcpy(ifb.ifbr_ifsname, ifp->if_xname);
-	strcpy(ifd.ifd_name, ifp->if_xname);
-	ifd.ifd_cmd = BRDGADD;
-	ifd.ifd_len = sizeof(ifb);
-	ifd.ifd_data = &ifb;
+	/** Various configuration and state bit flags. */
+	xnb_flag_t		flags;
 
-	return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd);
-	
-}
+	/** Mutex protecting per-instance data in the receive path. */
+	struct mtx		rx_lock;
 
-static int
-netif_create(int handle, struct xenbus_device *xdev, char *bridge)
-{
-	netif_t *netif;
-	struct ifnet *ifp;
+	/** Mutex protecting per-instance data in the softc structure. */
+	struct mtx		sc_lock;
 
-	netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO);
-	if (!netif)
-		return ENOMEM;
+	/** Mutex protecting per-instance data in the transmit path. */
+	struct mtx		tx_lock;
 
-	netif->ref_cnt = 1;
-	netif->handle = handle;
-	netif->domid = xdev->otherend_id;
-	netif->xdev = xdev;
-	netif->bridge = bridge;
-	xdev->data = netif;
-
-	/* Set up ifnet structure */
-	ifp = netif->ifp = if_alloc(IFT_ETHER);
-	if (!ifp) {
-		if (bridge)
-			free(bridge, M_DEVBUF);
-		free(netif, M_DEVBUF);
-		return ENOMEM;
-	}
+	/** The size of the global kva pool. */
+	int			kva_size;
+};
 
-	ifp->if_softc = netif;
-	if_initname(ifp, "vif",
-		atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ );
-	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
-	ifp->if_output = ether_output;
-	ifp->if_start = netback_start;
-	ifp->if_ioctl = netback_ioctl;
-	ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
-	
-	DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle);
+/*---------------------------- Debugging functions ---------------------------*/
+#ifdef XNB_DEBUG
+static void __unused
+xnb_dump_gnttab_copy(const struct gnttab_copy *entry)
+{
+	if (entry == NULL) {
+		printf("NULL grant table pointer\n");
+		return;
+	}
 
-	return 0;
+	if (entry->flags & GNTCOPY_dest_gref)
+		printf("gnttab dest ref=\t%u\n", entry->dest.u.ref);
+	else

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201201261635.q0QGZ9mR096638>