Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 14 Feb 2012 22:49:35 +0000 (UTC)
From:      Luigi Rizzo <luigi@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r231717 - in stable/8: share/man/man4 sys/conf sys/dev/netmap sys/net tools/tools/netmap
Message-ID:  <201202142249.q1EMnZ6S022850@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: luigi
Date: Tue Feb 14 22:49:34 2012
New Revision: 231717
URL: http://svn.freebsd.org/changeset/base/231717

Log:
  MFC: bring in the core netmap code (disconnected
  from the build). As usual, device driver patches
  will come separately.

Added:
  stable/8/share/man/man4/netmap.4   (contents, props changed)
  stable/8/sys/dev/netmap/
  stable/8/sys/dev/netmap/if_em_netmap.h   (contents, props changed)
  stable/8/sys/dev/netmap/if_igb_netmap.h   (contents, props changed)
  stable/8/sys/dev/netmap/if_lem_netmap.h   (contents, props changed)
  stable/8/sys/dev/netmap/if_re_netmap.h   (contents, props changed)
  stable/8/sys/dev/netmap/ixgbe_netmap.h   (contents, props changed)
  stable/8/sys/dev/netmap/netmap.c   (contents, props changed)
  stable/8/sys/dev/netmap/netmap_kern.h   (contents, props changed)
  stable/8/sys/net/netmap.h   (contents, props changed)
  stable/8/sys/net/netmap_user.h   (contents, props changed)
  stable/8/tools/tools/netmap/
  stable/8/tools/tools/netmap/Makefile   (contents, props changed)
  stable/8/tools/tools/netmap/README   (contents, props changed)
  stable/8/tools/tools/netmap/bridge.c   (contents, props changed)
  stable/8/tools/tools/netmap/click-test.cfg   (contents, props changed)
  stable/8/tools/tools/netmap/pcap.c   (contents, props changed)
  stable/8/tools/tools/netmap/pkt-gen.c   (contents, props changed)
Modified:
  stable/8/share/man/man4/Makefile
  stable/8/sys/conf/NOTES
  stable/8/sys/conf/files
  stable/8/sys/conf/options

Modified: stable/8/share/man/man4/Makefile
==============================================================================
--- stable/8/share/man/man4/Makefile	Tue Feb 14 22:27:43 2012	(r231716)
+++ stable/8/share/man/man4/Makefile	Tue Feb 14 22:49:34 2012	(r231717)
@@ -246,6 +246,7 @@ MAN=	aac.4 \
 	net80211.4 \
 	netgraph.4 \
 	netintro.4 \
+	netmap.4 \
 	${_nfe.4} \
 	${_nfsmb.4} \
 	ng_async.4 \

Added: stable/8/share/man/man4/netmap.4
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/8/share/man/man4/netmap.4	Tue Feb 14 22:49:34 2012	(r231717)
@@ -0,0 +1,299 @@
+.\" Copyright (c) 2011 Matteo Landi, Luigi Rizzo, Universita` di Pisa
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\" 
+.\" This document is derived in part from the enet man page (enet.4)
+.\" distributed with 4.3BSD Unix.
+.\"
+.\" $FreeBSD$
+.\" $Id: netmap.4 9662 2011-11-16 13:18:06Z luigi $: stable/8/share/man/man4/bpf.4 181694 2008-08-13 17:45:06Z ed $
+.\"
+.Dd November 16, 2011
+.Dt NETMAP 4
+.Os
+.Sh NAME
+.Nm netmap
+.Nd a framework for fast packet I/O
+.Sh SYNOPSIS
+.Cd device netmap
+.Sh DESCRIPTION
+.Nm
+is a framework for fast and safe access to network devices
+(reaching 14.88 Mpps at less than 1 GHz).
+.Nm
+uses memory mapped buffers and metadata
+(buffer indexes and lengths) to communicate with the kernel,
+which is in charge of validating information through 
+.Pa ioctl()
+and
+.Pa select()/poll().
+.Nm
+can exploit the parallelism in multiqueue devices and
+multicore systems.
+.Pp
+.Pp
+.Nm
+requires explicit support in device drivers.
+For a list of supported devices, see the end of this manual page.
+.Sh OPERATION
+.Nm
+clients must first open the
+.Pa open("/dev/netmap") ,
+and then issue an
+.Pa ioctl(...,NIOCREGIF,...)
+to bind the file descriptor to a network device.
+.Pp
+When a device is put in
+.Nm
+mode, its data path is disconnected from the host stack.
+The processes owning the file descriptor 
+can exchange packets with the device, or with the host stack,
+through an mmapped memory region that contains pre-allocated
+buffers and metadata.
+.Pp
+Non blocking I/O is done with special
+.Pa ioctl()'s ,
+whereas the file descriptor can be passed to
+.Pa select()/poll()
+to be notified about incoming packet or available transmit buffers.
+.Ss Data structures
+All data structures for all devices in
+.Nm
+mode are in a memory
+region shared by the kernel and all processes
+who open
+.Pa /dev/netmap
+(NOTE: visibility may be restricted in future implementations).
+All references between the shared data structure
+are relative (offsets or indexes). Some macros help converting
+them into actual pointers.
+.Pp
+The data structures in shared memory are the following:
+.Pp
+.Bl -tag -width XXX
+.It Dv struct netmap_if (one per interface)
+indicates the number of rings supported by an interface, their
+sizes, and the offsets of the
+.Pa netmap_rings
+associated to the interface.
+The offset of a
+.Pa struct netmap_if
+in the shared memory region is indicated by the
+.Pa nr_offset
+field in the structure returned by the
+.Pa NIOCREGIF
+(see below).
+.Bd -literal
+struct netmap_if {
+    char ni_name[IFNAMSIZ]; /* name of the interface. */
+    const u_int ni_num_queues; /* number of hw ring pairs */
+    const ssize_t   ring_ofs[]; /* offset of tx and rx rings */
+};
+.Ed
+.It Dv struct netmap_ring (one per ring)
+contains the index of the current read or write slot (cur),
+the number of slots available for reception or transmission (avail),
+and an array of
+.Pa slots
+describing the buffers.
+There is one ring pair for each of the N hardware ring pairs
+supported by the card (numbered 0..N-1), plus
+one ring pair (numbered N) for packets from/to the host stack.
+.Bd -literal
+struct netmap_ring {
+    const ssize_t buf_ofs;
+    const uint32_t num_slots; /* number of slots in the ring. */
+    uint32_t avail; /* number of usable slots */
+    uint32_t cur; /* 'current' index for the user side */
+
+    const uint16_t nr_buf_size;
+    uint16_t flags;
+    struct netmap_slot slot[0]; /* array of slots. */
+}
+.Ed
+.It Dv struct netmap_slot (one per packet)
+contains the metadata for a packet: a buffer index (buf_idx),
+a buffer length (len), and some flags.
+.Bd -literal
+struct netmap_slot {
+    uint32_t buf_idx; /* buffer index */
+    uint16_t len;   /* packet length */
+    uint16_t flags; /* buf changed, etc. */
+#define NS_BUF_CHANGED  0x0001  /* must resync, buffer changed */
+#define NS_REPORT       0x0002  /* tell hw to report results
+                                 * e.g. by generating an interrupt
+                                 */
+};
+.Ed
+.It Dv packet buffers
+are fixed size (approximately 2k) buffers allocated by the kernel
+that contain packet data. Buffers addresses are computed through
+macros.
+.El
+.Pp
+Some macros support the access to objects in the shared memory
+region. In particular:
+.Bd -literal
+struct netmap_if *nifp;
+struct netmap_ring *txring = NETMAP_TXRING(nifp, i);
+struct netmap_ring *rxring = NETMAP_RXRING(nifp, i);
+int i = txring->slot[txring->cur].buf_idx;
+char *buf = NETMAP_BUF(txring, i);
+.Ed
+.Ss IOCTLS
+.Pp
+.Nm
+supports some ioctl() to synchronize the state of the rings
+between the kernel and the user processes, plus some
+to query and configure the interface.
+The former do not require any argument, whereas the latter
+use a
+.Pa struct netmap_req
+defined as follows:
+.Bd -literal
+struct nmreq {
+        char      nr_name[IFNAMSIZ];
+        uint32_t  nr_offset;      /* nifp offset in the shared region */
+        uint32_t  nr_memsize;     /* size of the shared region */
+        uint32_t  nr_numdescs;    /* descriptors per queue */
+        uint16_t  nr_numqueues;
+        uint16_t  nr_ringid;      /* ring(s) we care about */
+#define NETMAP_HW_RING  0x4000    /* low bits indicate one hw ring */
+#define NETMAP_SW_RING  0x2000    /* we process the sw ring */
+#define NETMAP_NO_TX_POLL 0x1000  /* no gratuitous txsync on poll */
+#define NETMAP_RING_MASK 0xfff    /* the actual ring number */
+};
+
+.Ed
+A device descriptor obtained through
+.Pa /dev/netmap
+also supports the ioctl supported by network devices.
+.Pp
+The netmap-specific
+.Xr ioctl 2
+command codes below are defined in
+.In net/netmap.h
+and are:
+.Bl -tag -width XXXX
+.It Dv NIOCGINFO
+returns information about the interface named in nr_name.
+On return, nr_memsize indicates the size of the shared netmap
+memory region (this is device-independent),
+nr_numslots indicates how many buffers are in a ring,
+nr_numrings indicates the number of rings supported by the hardware.
+.Pp
+If the device does not support netmap, the ioctl returns EINVAL.
+.It Dv NIOCREGIF
+puts the interface named in nr_name into netmap mode, disconnecting
+it from the host stack, and/or defines which rings are controlled
+through this file descriptor.
+On return, it gives the same info as NIOCGINFO, and nr_ringid
+indicates the identity of the rings controlled through the file
+descriptor.
+.Pp
+Possible values for nr_ringid are
+.Bl -tag -width XXXXX
+.It 0
+default, all hardware rings
+.It NETMAP_SW_RING
+the ``host rings'' connecting to the host stack
+.It NETMAP_HW_RING + i
+the i-th hardware ring
+.El
+By default, a
+.Nm poll
+or
+.Nm select
+call pushes out any pending packets on the transmit ring, even if
+no write events are specified.
+The feature can be disabled by or-ing
+.Nm NETMAP_NO_TX_SYNC
+to nr_ringid.
+But normally you should keep this feature unless you are using
+separate file descriptors for the send and receive rings, because
+otherwise packets are pushed out only if NETMAP_TXSYNC is called,
+or the send queue is full.
+.Pp
+.Pa NIOCREGIF
+can be used multiple times to change the association of a
+file descriptor to a ring pair, always within the same device.
+.It Dv NIOCUNREGIF
+brings an interface back to normal mode.
+.It Dv NIOCTXSYNC
+tells the hardware of new packets to transmit, and updates the
+number of slots available for transmission.
+.It Dv NIOCRXSYNC
+tells the hardware of consumed packets, and asks for newly available
+packets.
+.El
+.Ss SYSTEM CALLS
+.Nm
+uses
+.Nm select
+and
+.Nm poll
+to wake up processes when significant events occur.
+.Sh EXAMPLES
+The following code implements a traffic generator
+.Pp
+.Bd -literal -compact
+#include <net/netmap.h>
+#include <net/netmap_user.h>
+struct netmap_if *nifp;
+struct netmap_ring *ring;
+struct netmap_request nmr;
+
+fd = open("/dev/netmap", O_RDWR);
+bzero(&nmr, sizeof(nmr));
+strcpy(nmr.nm_name, "ix0");
+ioctl(fd, NIOCREG, &nmr);
+p = mmap(0, nmr.memsize, fd);
+nifp = NETMAP_IF(p, nmr.offset);
+ring = NETMAP_TXRING(nifp, 0);
+fds.fd = fd;
+fds.events = POLLOUT;
+for (;;) {
+    poll(list, 1, -1);
+    while (ring->avail-- > 0) {
+        i = ring->cur;
+        buf = NETMAP_BUF(ring, ring->slot[i].buf_index);
+        ... prepare packet in buf ...
+        ring->slot[i].len = ... packet length ...
+        ring->cur = NETMAP_RING_NEXT(ring, i);
+    }
+}
+.Ed
+.Sh SUPPORTED INTERFACES
+.Nm
+supports the following interfaces:
+.Xr em 4 ,
+.Xr ixgbe 4 ,
+.Xr re 4 ,
+.Sh AUTHORS
+The
+.Nm
+framework has been designed and implemented by
+.An Luigi Rizzo
+and
+.An Matteo Landi
+in 2011 at the Universita` di Pisa.

Modified: stable/8/sys/conf/NOTES
==============================================================================
--- stable/8/sys/conf/NOTES	Tue Feb 14 22:27:43 2012	(r231716)
+++ stable/8/sys/conf/NOTES	Tue Feb 14 22:49:34 2012	(r231717)
@@ -780,6 +780,12 @@ device		sppp
 #  simultaneous BPF clients programs runnable.  DHCP requires bpf.
 device		bpf
 
+#  The `netmap' device implements memory-mapped access to network
+#  devices from userspace, enabling wire-speed packet capture and
+#  generation even at 10Gbit/s. Requires support in the device
+#  driver. Supported drivers are ixgbe, e1000, re.
+device		netmap
+
 #  The `disc' device implements a minimal network interface,
 #  which throws away all packets sent and never receives any.  It is
 #  included for testing and benchmarking purposes.

Modified: stable/8/sys/conf/files
==============================================================================
--- stable/8/sys/conf/files	Tue Feb 14 22:27:43 2012	(r231716)
+++ stable/8/sys/conf/files	Tue Feb 14 22:49:34 2012	(r231717)
@@ -1385,6 +1385,7 @@ dev/mxge/mxge_rss_ethp_z8e.c	optional mx
 dev/my/if_my.c			optional my
 dev/ncv/ncr53c500.c		optional ncv
 dev/ncv/ncr53c500_pccard.c	optional ncv pccard
+dev/netmap/netmap.c		optional netmap
 dev/nge/if_nge.c		optional nge
 dev/nxge/if_nxge.c		optional nxge
 dev/nxge/xgehal/xgehal-device.c	optional nxge

Modified: stable/8/sys/conf/options
==============================================================================
--- stable/8/sys/conf/options	Tue Feb 14 22:27:43 2012	(r231716)
+++ stable/8/sys/conf/options	Tue Feb 14 22:49:34 2012	(r231717)
@@ -680,6 +680,7 @@ ISAPNP			opt_isa.h
 
 # various 'device presence' options.
 DEV_BPF			opt_bpf.h
+DEV_NETMAP		opt_global.h
 DEV_MCA			opt_mca.h
 DEV_CARP		opt_carp.h
 DEV_PTY			opt_tty.h

Added: stable/8/sys/dev/netmap/if_em_netmap.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/8/sys/dev/netmap/if_em_netmap.h	Tue Feb 14 22:49:34 2012	(r231717)
@@ -0,0 +1,397 @@
+/*
+ * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: if_em_netmap.h 9802 2011-12-02 18:42:37Z luigi $
+ *
+ * netmap changes for if_em.
+ *
+ * For structure and details on the individual functions please see
+ * ixgbe_netmap.h
+ */
+
+#include <net/netmap.h>
+#include <sys/selinfo.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>    /* vtophys ? */
+#include <dev/netmap/netmap_kern.h>
+
+static void	em_netmap_block_tasks(struct adapter *);
+static void	em_netmap_unblock_tasks(struct adapter *);
+static int	em_netmap_reg(struct ifnet *, int onoff);
+static int	em_netmap_txsync(struct ifnet *, u_int, int);
+static int	em_netmap_rxsync(struct ifnet *, u_int, int);
+static void	em_netmap_lock_wrapper(struct ifnet *, int, u_int);
+
+static void
+em_netmap_attach(struct adapter *adapter)
+{
+	struct netmap_adapter na;
+
+	bzero(&na, sizeof(na));
+
+	na.ifp = adapter->ifp;
+	na.separate_locks = 1;
+	na.num_tx_desc = adapter->num_tx_desc;
+	na.num_rx_desc = adapter->num_rx_desc;
+	na.nm_txsync = em_netmap_txsync;
+	na.nm_rxsync = em_netmap_rxsync;
+	na.nm_lock = em_netmap_lock_wrapper;
+	na.nm_register = em_netmap_reg;
+	netmap_attach(&na, adapter->num_queues);
+}
+
+
+/*
+ * wrapper to export locks to the generic code
+ */
+static void
+em_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int queueid)
+{
+	struct adapter *adapter = ifp->if_softc;
+
+	ASSERT(queueid < adapter->num_queues);
+	switch (what) {
+	case NETMAP_CORE_LOCK:
+		EM_CORE_LOCK(adapter);
+		break;
+	case NETMAP_CORE_UNLOCK:
+		EM_CORE_UNLOCK(adapter);
+		break;
+	case NETMAP_TX_LOCK:
+		EM_TX_LOCK(&adapter->tx_rings[queueid]);
+		break;
+	case NETMAP_TX_UNLOCK:
+		EM_TX_UNLOCK(&adapter->tx_rings[queueid]);
+		break;
+	case NETMAP_RX_LOCK:
+		EM_RX_LOCK(&adapter->rx_rings[queueid]);
+		break;
+	case NETMAP_RX_UNLOCK:
+		EM_RX_UNLOCK(&adapter->rx_rings[queueid]);
+		break;
+	}
+}
+
+
+// XXX do we need to block/unblock the tasks ?
+static void
+em_netmap_block_tasks(struct adapter *adapter)
+{
+	if (adapter->msix > 1) { /* MSIX */
+		int i;
+		struct tx_ring *txr = adapter->tx_rings;
+		struct rx_ring *rxr = adapter->rx_rings;
+
+		for (i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+			taskqueue_block(txr->tq);
+			taskqueue_drain(txr->tq, &txr->tx_task);
+			taskqueue_block(rxr->tq);
+			taskqueue_drain(rxr->tq, &rxr->rx_task);
+		}
+	} else {	/* legacy */
+		taskqueue_block(adapter->tq);
+		taskqueue_drain(adapter->tq, &adapter->link_task);
+		taskqueue_drain(adapter->tq, &adapter->que_task);
+	}
+}
+
+
+static void
+em_netmap_unblock_tasks(struct adapter *adapter)
+{
+	if (adapter->msix > 1) {
+		struct tx_ring *txr = adapter->tx_rings;
+		struct rx_ring *rxr = adapter->rx_rings;
+		int i;
+
+		for (i = 0; i < adapter->num_queues; i++) {
+			taskqueue_unblock(txr->tq);
+			taskqueue_unblock(rxr->tq);
+		}
+	} else { /* legacy */
+		taskqueue_unblock(adapter->tq);
+	}
+}
+
+/*
+ * register-unregister routine
+ */
+static int
+em_netmap_reg(struct ifnet *ifp, int onoff)
+{
+	struct adapter *adapter = ifp->if_softc;
+	struct netmap_adapter *na = NA(ifp);
+	int error = 0;
+
+	if (na == NULL)
+		return EINVAL;	/* no netmap support here */
+
+	em_disable_intr(adapter);
+
+	/* Tell the stack that the interface is no longer active */
+	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+	em_netmap_block_tasks(adapter);
+
+	if (onoff) {
+		ifp->if_capenable |= IFCAP_NETMAP;
+
+		na->if_transmit = ifp->if_transmit;
+		ifp->if_transmit = netmap_start;
+
+		em_init_locked(adapter);
+		if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
+			error = ENOMEM;
+			goto fail;
+		}
+	} else {
+fail:
+		/* restore if_transmit */
+		ifp->if_transmit = na->if_transmit;
+		ifp->if_capenable &= ~IFCAP_NETMAP;
+		em_init_locked(adapter);	/* also enable intr */
+	}
+	em_netmap_unblock_tasks(adapter);
+	return (error);
+}
+
+/*
+ * Reconcile hardware and user view of the transmit ring.
+ */
+static int
+em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+{
+	struct adapter *adapter = ifp->if_softc;
+	struct tx_ring *txr = &adapter->tx_rings[ring_nr];
+	struct netmap_adapter *na = NA(adapter->ifp);
+	struct netmap_kring *kring = &na->tx_rings[ring_nr];
+	struct netmap_ring *ring = kring->ring;
+	int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
+
+	/* generate an interrupt approximately every half ring */
+	int report_frequency = kring->nkr_num_slots >> 1;
+
+	k = ring->cur;
+	if (k > lim)
+		return netmap_ring_reinit(kring);
+
+	if (do_lock)
+		EM_TX_LOCK(txr);
+	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+			BUS_DMASYNC_POSTREAD);
+
+	/* check for new packets to send.
+	 * j indexes the netmap ring, l indexes the nic ring, and
+	 *	j = kring->nr_hwcur, l = E1000_TDT (not tracked),
+	 *	j == (l + kring->nkr_hwofs) % ring_size
+	 */
+	j = kring->nr_hwcur;
+	if (j != k) {	/* we have packets to send */
+		l = j - kring->nkr_hwofs;
+		if (l < 0)
+			l += lim + 1;
+		while (j != k) {
+			struct netmap_slot *slot = &ring->slot[j];
+			struct e1000_tx_desc *curr = &txr->tx_base[l];
+			struct em_buffer *txbuf = &txr->tx_buffers[l];
+			int flags = ((slot->flags & NS_REPORT) ||
+				j == 0 || j == report_frequency) ?
+					E1000_TXD_CMD_RS : 0;
+			uint64_t paddr;
+			void *addr = PNMB(slot, &paddr);
+			int len = slot->len;
+			if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
+				if (do_lock)
+					EM_TX_UNLOCK(txr);
+				return netmap_ring_reinit(kring);
+			}
+
+			slot->flags &= ~NS_REPORT;
+			curr->upper.data = 0;
+			curr->lower.data = 
+			    htole32(adapter->txd_cmd | len |
+				(E1000_TXD_CMD_EOP | flags) );
+			if (slot->flags & NS_BUF_CHANGED) {
+				curr->buffer_addr = htole64(paddr);
+				/* buffer has changed, reload map */
+				netmap_reload_map(txr->txtag, txbuf->map, addr);
+				slot->flags &= ~NS_BUF_CHANGED;
+			}
+
+			bus_dmamap_sync(txr->txtag, txbuf->map,
+				BUS_DMASYNC_PREWRITE);
+			j = (j == lim) ? 0 : j + 1;
+			l = (l == lim) ? 0 : l + 1;
+			n++;
+		}
+		kring->nr_hwcur = k;
+
+		/* decrease avail by number of sent packets */
+		kring->nr_hwavail -= n;
+
+		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+
+		E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l);
+	}
+
+	if (n == 0 || kring->nr_hwavail < 1) {
+		int delta;
+
+		/* record completed transmissions using THD. */
+		l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
+		if (l >= kring->nkr_num_slots) { /* XXX can happen */
+			D("TDH wrap %d", l);
+			l -= kring->nkr_num_slots;
+		}
+		delta = l - txr->next_to_clean;
+		if (delta) {
+			/* some completed, increment hwavail. */
+			if (delta < 0)
+				delta += kring->nkr_num_slots;
+			txr->next_to_clean = l;
+			kring->nr_hwavail += delta;
+		}
+	}
+	/* update avail to what the hardware knows */
+	ring->avail = kring->nr_hwavail;
+
+	if (do_lock)
+		EM_TX_UNLOCK(txr);
+	return 0;
+}
+
+/*
+ * Reconcile kernel and user view of the receive ring.
+ */
+static int
+em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+{
+	struct adapter *adapter = ifp->if_softc;
+	struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
+	struct netmap_adapter *na = NA(adapter->ifp);
+	struct netmap_kring *kring = &na->rx_rings[ring_nr];
+	struct netmap_ring *ring = kring->ring;
+	int j, k, l, n, lim = kring->nkr_num_slots - 1;
+
+	k = ring->cur;
+	if (k > lim)
+		return netmap_ring_reinit(kring);
+ 
+	if (do_lock)
+		EM_RX_LOCK(rxr);
+	/* XXX check sync modes */
+	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+			BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+	/* import newly received packets into the netmap ring.
+	 * j is an index in the netmap ring, l in the NIC ring, and
+	 *	j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
+	 *	l = rxr->next_to_check;
+	 * and
+	 *	j == (l + kring->nkr_hwofs) % ring_size
+	 */
+	l = rxr->next_to_check;
+	j = l + kring->nkr_hwofs;
+	/* here nkr_hwofs can be negative so must check for j < 0 */
+	if (j < 0)
+		j += lim + 1;
+	else if (j > lim)
+		j -= lim + 1;
+	for (n = 0; ; n++) {
+		struct e1000_rx_desc *curr = &rxr->rx_base[l];
+
+		if ((curr->status & E1000_RXD_STAT_DD) == 0)
+			break;
+		ring->slot[j].len = le16toh(curr->length);
+		bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[l].map,
+			BUS_DMASYNC_POSTREAD);
+		j = (j == lim) ? 0 : j + 1;
+		/* make sure next_to_refresh follows next_to_check */
+		rxr->next_to_refresh = l;	// XXX
+		l = (l == lim) ? 0 : l + 1;
+	}
+	if (n) {
+		rxr->next_to_check = l;
+		kring->nr_hwavail += n;
+	}
+
+	/* skip past packets that userspace has already processed */
+	j = kring->nr_hwcur;
+	if (j != k) { /* userspace has read some packets. */
+		n = 0;
+		l = j - kring->nkr_hwofs; /* NIC ring index */
+		/* here nkr_hwofs can be negative so check for l > lim */
+		if (l < 0)
+			l += lim + 1;
+		else if (l > lim)
+			l -= lim + 1;
+		while (j != k) {
+			struct netmap_slot *slot = &ring->slot[j];
+			struct e1000_rx_desc *curr = &rxr->rx_base[l];
+			struct em_buffer *rxbuf = &rxr->rx_buffers[l];
+			uint64_t paddr;
+			void *addr = PNMB(slot, &paddr);
+
+			if (addr == netmap_buffer_base) { /* bad buf */
+				if (do_lock)
+					EM_RX_UNLOCK(rxr);
+				return netmap_ring_reinit(kring);
+			}
+
+			curr->status = 0;
+			if (slot->flags & NS_BUF_CHANGED) {
+				curr->buffer_addr = htole64(paddr);
+				/* buffer has changed, reload map */
+				netmap_reload_map(rxr->rxtag, rxbuf->map, addr);
+				slot->flags &= ~NS_BUF_CHANGED;
+			}
+
+			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
+			    BUS_DMASYNC_PREREAD);
+
+			j = (j == lim) ? 0 : j + 1;
+			l = (l == lim) ? 0 : l + 1;
+			n++;
+		}
+		kring->nr_hwavail -= n;
+		kring->nr_hwcur = k;
+		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+		/*
+		 * IMPORTANT: we must leave one free slot in the ring,
+		 * so move l back by one unit
+		 */
+		l = (l == 0) ? lim : l - 1;
+		E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l);
+	}
+	/* tell userspace that there are new packets */
+	ring->avail = kring->nr_hwavail ;
+	if (do_lock)
+		EM_RX_UNLOCK(rxr);
+	return 0;
+}

Added: stable/8/sys/dev/netmap/if_igb_netmap.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/8/sys/dev/netmap/if_igb_netmap.h	Tue Feb 14 22:49:34 2012	(r231717)
@@ -0,0 +1,357 @@
+/*
+ * Copyright (C) 2011 Universita` di Pisa. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: if_igb_netmap.h 9802 2011-12-02 18:42:37Z luigi $
+ *
+ * netmap modifications for igb
+ * contribured by Ahmed Kooli
+ */
+
+#include <net/netmap.h>
+#include <sys/selinfo.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>    /* vtophys ? */
+#include <dev/netmap/netmap_kern.h>
+
+static int	igb_netmap_reg(struct ifnet *, int onoff);
+static int	igb_netmap_txsync(struct ifnet *, u_int, int);
+static int	igb_netmap_rxsync(struct ifnet *, u_int, int);
+static void	igb_netmap_lock_wrapper(struct ifnet *, int, u_int);
+
+
+static void
+igb_netmap_attach(struct adapter *adapter)
+{
+	struct netmap_adapter na;
+
+	bzero(&na, sizeof(na));
+
+	na.ifp = adapter->ifp;
+	na.separate_locks = 1;
+	na.num_tx_desc = adapter->num_tx_desc;
+	na.num_rx_desc = adapter->num_rx_desc;
+	na.nm_txsync = igb_netmap_txsync;
+	na.nm_rxsync = igb_netmap_rxsync;
+	na.nm_lock = igb_netmap_lock_wrapper;
+	na.nm_register = igb_netmap_reg;
+	netmap_attach(&na, adapter->num_queues);
+}	
+
+
+/*
+ * wrapper to export locks to the generic code
+ */
+static void
+igb_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int queueid)
+{
+	struct adapter *adapter = ifp->if_softc;
+
+	ASSERT(queueid < adapter->num_queues);
+	switch (what) {
+	case NETMAP_CORE_LOCK:
+		IGB_CORE_LOCK(adapter);
+		break;
+	case NETMAP_CORE_UNLOCK:
+		IGB_CORE_UNLOCK(adapter);
+		break;
+	case NETMAP_TX_LOCK:
+		IGB_TX_LOCK(&adapter->tx_rings[queueid]);
+		break;
+	case NETMAP_TX_UNLOCK:
+		IGB_TX_UNLOCK(&adapter->tx_rings[queueid]);
+		break;
+	case NETMAP_RX_LOCK:
+		IGB_RX_LOCK(&adapter->rx_rings[queueid]);
+		break;
+	case NETMAP_RX_UNLOCK:
+		IGB_RX_UNLOCK(&adapter->rx_rings[queueid]);
+		break;
+	}
+}
+
+
+/*
+ * support for netmap register/unregisted. We are already under core lock.
+ * only called on the first init or the last unregister.
+ */
+static int
+igb_netmap_reg(struct ifnet *ifp, int onoff)
+{
+	struct adapter *adapter = ifp->if_softc;
+	struct netmap_adapter *na = NA(ifp);
+	int error = 0;
+
+	if (na == NULL)
+		return EINVAL;
+
+	igb_disable_intr(adapter);
+
+	/* Tell the stack that the interface is no longer active */
+	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+	if (onoff) {
+		ifp->if_capenable |= IFCAP_NETMAP;
+
+		/* save if_transmit to restore it later */
+		na->if_transmit = ifp->if_transmit;
+		ifp->if_transmit = netmap_start;
+
+		igb_init_locked(adapter);
+		if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
+			error = ENOMEM;
+			goto fail;
+		}
+	} else {
+fail:
+		/* restore if_transmit */
+		ifp->if_transmit = na->if_transmit;
+		ifp->if_capenable &= ~IFCAP_NETMAP;
+		igb_init_locked(adapter);	/* also enables intr */
+	}
+	return (error);
+}
+
+
+/*
+ * Reconcile kernel and user view of the transmit ring.
+ */
+static int
+igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock)
+{
+	struct adapter *adapter = ifp->if_softc;
+	struct tx_ring *txr = &adapter->tx_rings[ring_nr];
+	struct netmap_adapter *na = NA(adapter->ifp);
+	struct netmap_kring *kring = &na->tx_rings[ring_nr];
+	struct netmap_ring *ring = kring->ring;
+	int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
+
+	/* generate an interrupt approximately every half ring */
+	int report_frequency = kring->nkr_num_slots >> 1;
+
+	k = ring->cur;
+	if (k > lim)
+		return netmap_ring_reinit(kring);
+
+	if (do_lock)
+		IGB_TX_LOCK(txr);
+	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+	    BUS_DMASYNC_POSTREAD);
+
+	/* update avail to what the hardware knows */
+	ring->avail = kring->nr_hwavail;
+
+	j = kring->nr_hwcur; /* netmap ring index */
+	if (j != k) {	/* we have new packets to send */
+		u32 olinfo_status = 0;
+
+		l = j - kring->nkr_hwofs; /* NIC ring index */
+		if (l < 0)
+			l += lim + 1;
+		/* 82575 needs the queue index added */
+		if (adapter->hw.mac.type == e1000_82575)
+			olinfo_status |= txr->me << 4;
+
+		while (j != k) {
+			struct netmap_slot *slot = &ring->slot[j];
+			struct igb_tx_buffer *txbuf = &txr->tx_buffers[l];
+			union e1000_adv_tx_desc *curr =
+			    (union e1000_adv_tx_desc *)&txr->tx_base[l];
+			uint64_t paddr;
+			void *addr = PNMB(slot, &paddr);
+			int flags = ((slot->flags & NS_REPORT) ||
+				j == 0 || j == report_frequency) ?
+					E1000_ADVTXD_DCMD_RS : 0;
+			int len = slot->len;
+
+			if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
+				if (do_lock)
+					IGB_TX_UNLOCK(txr);
+				return netmap_ring_reinit(kring);
+			}
+
+			slot->flags &= ~NS_REPORT;
+			// XXX do we need to set the address ?
+			curr->read.buffer_addr = htole64(paddr);
+			curr->read.olinfo_status =
+			    htole32(olinfo_status |
+				(len<< E1000_ADVTXD_PAYLEN_SHIFT));
+			curr->read.cmd_type_len =
+			    htole32(len | E1000_ADVTXD_DTYP_DATA |
+				    E1000_ADVTXD_DCMD_IFCS |
+				    E1000_ADVTXD_DCMD_DEXT |
+				    E1000_ADVTXD_DCMD_EOP | flags);
+			if (slot->flags & NS_BUF_CHANGED) {
+				/* buffer has changed, reload map */
+				netmap_reload_map(txr->txtag, txbuf->map, addr);
+				slot->flags &= ~NS_BUF_CHANGED;
+			}
+
+			bus_dmamap_sync(txr->txtag, txbuf->map,
+				BUS_DMASYNC_PREWRITE);
+			j = (j == lim) ? 0 : j + 1;
+			l = (l == lim) ? 0 : l + 1;
+			n++;
+		}
+		kring->nr_hwcur = k;
+
+		/* decrease avail by number of sent packets */
+		kring->nr_hwavail -= n;
+		ring->avail = kring->nr_hwavail;
+
+		/* Set the watchdog XXX ? */
+		txr->queue_status = IGB_QUEUE_WORKING;
+		txr->watchdog_time = ticks;
+
+		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+
+		E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l);
+	}
+	if (n == 0 || kring->nr_hwavail < 1) {
+		int delta;
+
+		/* record completed transmission using TDH */
+		l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
+		if (l >= kring->nkr_num_slots) /* XXX can it happen ? */
+			l -= kring->nkr_num_slots;
+		delta = l - txr->next_to_clean;
+		if (delta) {
+			/* new tx were completed */
+			if (delta < 0)
+				delta += kring->nkr_num_slots;
+			txr->next_to_clean = l;
+			kring->nr_hwavail += delta;
+			ring->avail = kring->nr_hwavail;
+		}
+	}

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201202142249.q1EMnZ6S022850>