Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 30 Oct 2018 08:36:37 +0000 (UTC)
From:      Vincenzo Maffione <vmaffione@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org
Subject:   svn commit: r339906 - in stable/12/sys: conf dev/netmap net
Message-ID:  <201810300836.w9U8abSf000249@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: vmaffione
Date: Tue Oct 30 08:36:36 2018
New Revision: 339906
URL: https://svnweb.freebsd.org/changeset/base/339906

Log:
  MFC r339639:
  
  netmap: align codebase to the current upstream (sha 8374e1a7e6941)
  
  Changelist:
      - Move large parts of VALE code to a new file and header netmap_bdg.[ch].
        This is useful to reuse the code within upcoming projects.
      - Improvements and bug fixes to pipes and monitors.
      - Introduce nm_os_onattach(), nm_os_onenter() and nm_os_onexit() to
        handle differences between FreeBSD and Linux.
      - Introduce some new helper functions to handle more host rings and fake
        rings (netmap_all_rings(), netmap_real_rings(), ...)
      - Added new sysctl to enable/disable hw checksum in emulated netmap mode.
      - nm_inject: add support for NS_MOREFRAG
  
  Approved by: re (gjb)

Added:
  stable/12/sys/dev/netmap/netmap_bdg.c
     - copied unchanged from r339639, head/sys/dev/netmap/netmap_bdg.c
  stable/12/sys/dev/netmap/netmap_bdg.h
     - copied unchanged from r339639, head/sys/dev/netmap/netmap_bdg.h
Modified:
  stable/12/sys/conf/files
  stable/12/sys/dev/netmap/netmap.c
  stable/12/sys/dev/netmap/netmap_freebsd.c
  stable/12/sys/dev/netmap/netmap_generic.c
  stable/12/sys/dev/netmap/netmap_kern.h
  stable/12/sys/dev/netmap/netmap_mem2.c
  stable/12/sys/dev/netmap/netmap_monitor.c
  stable/12/sys/dev/netmap/netmap_pipe.c
  stable/12/sys/dev/netmap/netmap_vale.c
  stable/12/sys/net/netmap.h
  stable/12/sys/net/netmap_user.h
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/conf/files
==============================================================================
--- stable/12/sys/conf/files	Tue Oct 30 05:04:18 2018	(r339905)
+++ stable/12/sys/conf/files	Tue Oct 30 08:36:36 2018	(r339906)
@@ -2544,6 +2544,7 @@ dev/netmap/netmap_pipe.c	optional netmap
 dev/netmap/netmap_pt.c		optional netmap
 dev/netmap/netmap_vale.c	optional netmap
 dev/netmap/netmap_legacy.c	optional netmap
+dev/netmap/netmap_bdg.c		optional netmap
 # compile-with "${NORMAL_C} -Wconversion -Wextra"
 dev/nfsmb/nfsmb.c		optional nfsmb pci
 dev/nge/if_nge.c		optional nge

Modified: stable/12/sys/dev/netmap/netmap.c
==============================================================================
--- stable/12/sys/dev/netmap/netmap.c	Tue Oct 30 05:04:18 2018	(r339905)
+++ stable/12/sys/dev/netmap/netmap.c	Tue Oct 30 08:36:36 2018	(r339906)
@@ -521,6 +521,9 @@ int netmap_generic_txqdisc = 1;
 int netmap_generic_ringsize = 1024;
 int netmap_generic_rings = 1;
 
+/* Non-zero to enable checksum offloading in NIC drivers */
+int netmap_generic_hwcsum = 0;
+
 /* Non-zero if ptnet devices are allowed to use virtio-net headers. */
 int ptnet_vnet_hdr = 1;
 
@@ -549,6 +552,9 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &ne
 SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0,
 		"Adapter mode. 0 selects the best option available,"
 		"1 forces native adapter, 2 forces emulated adapter");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_hwcsum, CTLFLAG_RW, &netmap_generic_hwcsum,
+		0, "Hardware checksums. 0 to disable checksum generation by the NIC (default),"
+		"1 to enable checksum generation by the NIC");
 SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit,
 		0, "RX notification interval in nanoseconds");
 SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW,
@@ -827,8 +833,8 @@ netmap_krings_create(struct netmap_adapter *na, u_int 
 	}
 
 	/* account for the (possibly fake) host rings */
-	n[NR_TX] = na->num_tx_rings + 1;
-	n[NR_RX] = na->num_rx_rings + 1;
+	n[NR_TX] = netmap_all_rings(na, NR_TX);
+	n[NR_RX] = netmap_all_rings(na, NR_RX);
 
 	len = (n[NR_TX] + n[NR_RX]) *
 		(sizeof(struct netmap_kring) + sizeof(struct netmap_kring *))
@@ -930,11 +936,14 @@ netmap_krings_delete(struct netmap_adapter *na)
 void
 netmap_hw_krings_delete(struct netmap_adapter *na)
 {
-	struct mbq *q = &na->rx_rings[na->num_rx_rings]->rx_queue;
+	u_int lim = netmap_real_rings(na, NR_RX), i;
 
-	ND("destroy sw mbq with len %d", mbq_len(q));
-	mbq_purge(q);
-	mbq_safe_fini(q);
+	for (i = nma_get_nrings(na, NR_RX); i < lim; i++) {
+		struct mbq *q = &NMR(na, NR_RX)[i]->rx_queue;
+		ND("destroy sw mbq with len %d", mbq_len(q));
+		mbq_purge(q);
+		mbq_safe_fini(q);
+	}
 	netmap_krings_delete(na);
 }
 
@@ -1535,7 +1544,7 @@ netmap_get_na(struct nmreq_header *hdr,
 		goto out;
 
 	/* try to see if this is a bridge port */
-	error = netmap_get_bdg_na(hdr, na, nmd, create);
+	error = netmap_get_vale_na(hdr, na, nmd, create);
 	if (error)
 		goto out;
 
@@ -1827,7 +1836,7 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint3
 			}
 			priv->np_qfirst[t] = (nr_mode == NR_REG_SW ?
 				nma_get_nrings(na, t) : 0);
-			priv->np_qlast[t] = nma_get_nrings(na, t) + 1;
+			priv->np_qlast[t] = netmap_all_rings(na, t);
 			ND("%s: %s %d %d", nr_mode == NR_REG_SW ? "SW" : "NIC+SW",
 				nm_txrx2str(t),
 				priv->np_qfirst[t], priv->np_qlast[t]);
@@ -2543,7 +2552,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, c
 			NMG_LOCK();
 			hdr->nr_reqtype = NETMAP_REQ_REGISTER;
 			hdr->nr_body = (uintptr_t)&regreq;
-			error = netmap_get_bdg_na(hdr, &na, NULL, 0);
+			error = netmap_get_vale_na(hdr, &na, NULL, 0);
 			hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_SET;
 			hdr->nr_body = (uintptr_t)req;
 			if (na && !error) {
@@ -3336,6 +3345,12 @@ netmap_attach_common(struct netmap_adapter *na)
 	}
 	na->pdev = na; /* make sure netmap_mem_map() is called */
 #endif /* __FreeBSD__ */
+	if (na->na_flags & NAF_HOST_RINGS) {
+		if (na->num_host_rx_rings == 0)
+			na->num_host_rx_rings = 1;
+		if (na->num_host_tx_rings == 0)
+			na->num_host_tx_rings = 1;
+	}
 	if (na->nm_krings_create == NULL) {
 		/* we assume that we have been called by a driver,
 		 * since other port types all provide their own
@@ -3357,7 +3372,7 @@ netmap_attach_common(struct netmap_adapter *na)
 		/* no special nm_bdg_attach callback. On VALE
 		 * attach, we need to interpose a bwrap
 		 */
-		na->nm_bdg_attach = netmap_bwrap_attach;
+		na->nm_bdg_attach = netmap_default_bdg_attach;
 #endif
 
 	return 0;
@@ -3399,10 +3414,10 @@ out:
 static void
 netmap_hw_dtor(struct netmap_adapter *na)
 {
-	if (nm_iszombie(na) || na->ifp == NULL)
+	if (na->ifp == NULL)
 		return;
 
-	WNA(na->ifp) = NULL;
+	NM_DETACH_NA(na->ifp);
 }
 
 
@@ -3426,10 +3441,10 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t s
 	}
 
 	if (arg == NULL || arg->ifp == NULL)
-		goto fail;
+		return EINVAL;
 
 	ifp = arg->ifp;
-	if (NA(ifp) && !NM_NA_VALID(ifp)) {
+	if (NM_NA_CLASH(ifp)) {
 		/* If NA(ifp) is not null but there is no valid netmap
 		 * adapter it means that someone else is using the same
 		 * pointer (e.g. ax25_ptr on linux). This happens for
@@ -3456,28 +3471,8 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t s
 
 	NM_ATTACH_NA(ifp, &hwna->up);
 
-#ifdef linux
-	if (ifp->netdev_ops) {
-		/* prepare a clone of the netdev ops */
-#ifndef NETMAP_LINUX_HAVE_NETDEV_OPS
-		hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
-#else
-		hwna->nm_ndo = *ifp->netdev_ops;
-#endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */
-	}
-	hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
-	hwna->nm_ndo.ndo_change_mtu = linux_netmap_change_mtu;
-	if (ifp->ethtool_ops) {
-		hwna->nm_eto = *ifp->ethtool_ops;
-	}
-	hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam;
-#ifdef NETMAP_LINUX_HAVE_SET_CHANNELS
-	hwna->nm_eto.set_channels = linux_netmap_set_channels;
-#endif /* NETMAP_LINUX_HAVE_SET_CHANNELS */
-	if (arg->nm_config == NULL) {
-		hwna->up.nm_config = netmap_linux_config;
-	}
-#endif /* linux */
+	nm_os_onattach(ifp);
+
 	if (arg->nm_dtor == NULL) {
 		hwna->up.nm_dtor = netmap_hw_dtor;
 	}
@@ -3545,7 +3540,10 @@ netmap_hw_krings_create(struct netmap_adapter *na)
 	int ret = netmap_krings_create(na, 0);
 	if (ret == 0) {
 		/* initialize the mbq for the sw rx ring */
-		mbq_safe_init(&na->rx_rings[na->num_rx_rings]->rx_queue);
+		u_int lim = netmap_real_rings(na, NR_RX), i;
+		for (i = na->num_rx_rings; i < lim; i++) {
+			mbq_safe_init(&NMR(na, NR_RX)[i]->rx_queue);
+		}
 		ND("initialized sw rx queue %d", na->num_rx_rings);
 	}
 	return ret;
@@ -3608,8 +3606,14 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
 	unsigned int txr;
 	struct mbq *q;
 	int busy;
+	u_int i;
 
-	kring = na->rx_rings[na->num_rx_rings];
+	i = MBUF_TXQ(m);
+	if (i >= na->num_host_rx_rings) {
+		i = i % na->num_host_rx_rings;
+	}
+	kring = NMR(na, NR_RX)[nma_get_nrings(na, NR_RX) + i];
+
 	// XXX [Linux] we do not need this lock
 	// if we follow the down/configure/up protocol -gl
 	// mtx_lock(&na->core_lock);
@@ -3639,8 +3643,15 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
 		goto done;
 	}
 
-	if (nm_os_mbuf_has_offld(m)) {
-		RD(1, "%s drop mbuf that needs offloadings", na->name);
+	if (!netmap_generic_hwcsum) {
+		if (nm_os_mbuf_has_csum_offld(m)) {
+			RD(1, "%s drop mbuf that needs checksum offload", na->name);
+			goto done;
+		}
+	}
+
+	if (nm_os_mbuf_has_seg_offld(m)) {
+		RD(1, "%s drop mbuf that needs generic segmentation offload", na->name);
 		goto done;
 	}
 
@@ -3843,6 +3854,40 @@ netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_
 	}
 
 	return netmap_common_irq(na, q, work_done);
+}
+
+/* set/clear native flags and if_transmit/netdev_ops */
+void
+nm_set_native_flags(struct netmap_adapter *na)
+{
+	struct ifnet *ifp = na->ifp;
+
+	/* We do the setup for intercepting packets only if we are the
+	 * first user of this adapapter. */
+	if (na->active_fds > 0) {
+		return;
+	}
+
+	na->na_flags |= NAF_NETMAP_ON;
+	nm_os_onenter(ifp);
+	nm_update_hostrings_mode(na);
+}
+
+void
+nm_clear_native_flags(struct netmap_adapter *na)
+{
+	struct ifnet *ifp = na->ifp;
+
+	/* We undo the setup for intercepting packets only if we are the
+	 * last user of this adapapter. */
+	if (na->active_fds > 0) {
+		return;
+	}
+
+	nm_update_hostrings_mode(na);
+	nm_os_onexit(ifp);
+
+	na->na_flags &= ~NAF_NETMAP_ON;
 }
 
 

Copied: stable/12/sys/dev/netmap/netmap_bdg.c (from r339639, head/sys/dev/netmap/netmap_bdg.c)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/12/sys/dev/netmap/netmap_bdg.c	Tue Oct 30 08:36:36 2018	(r339906, copy of r339639, head/sys/dev/netmap/netmap_bdg.c)
@@ -0,0 +1,1827 @@
+/*
+ * Copyright (C) 2013-2016 Universita` di Pisa
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+/*
+ * This module implements the VALE switch for netmap
+
+--- VALE SWITCH ---
+
+NMG_LOCK() serializes all modifications to switches and ports.
+A switch cannot be deleted until all ports are gone.
+
+For each switch, an SX lock (RWlock on linux) protects
+deletion of ports. When configuring or deleting a new port, the
+lock is acquired in exclusive mode (after holding NMG_LOCK).
+When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
+The lock is held throughout the entire forwarding cycle,
+during which the thread may incur in a page fault.
+Hence it is important that sleepable shared locks are used.
+
+On the rx ring, the per-port lock is grabbed initially to reserve
+a number of slot in the ring, then the lock is released,
+packets are copied from source to destination, and then
+the lock is acquired again and the receive ring is updated.
+(A similar thing is done on the tx ring for NIC and host stack
+ports attached to the switch)
+
+ */
+
+/*
+ * OS-specific code that is used only within this file.
+ * Other OS-specific code that must be accessed by drivers
+ * is present in netmap_kern.h
+ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>	/* defines used in kernel.h */
+#include <sys/kernel.h>	/* types used in module initialization */
+#include <sys/conf.h>	/* cdevsw struct, UID, GID */
+#include <sys/sockio.h>
+#include <sys/socketvar.h>	/* struct socket */
+#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/bpf.h>		/* BIOCIMMEDIATE */
+#include <machine/bus.h>	/* bus_dmamap_* */
+#include <sys/endian.h>
+#include <sys/refcount.h>
+#include <sys/smp.h>
+
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#elif defined(_WIN32)
+#include "win_glue.h"
+
+#else
+
+#error	Unsupported platform
+
+#endif /* unsupported */
+
+/*
+ * common headers
+ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#include <dev/netmap/netmap_bdg.h>
+
+const char*
+netmap_bdg_name(struct netmap_vp_adapter *vp)
+{
+	struct nm_bridge *b = vp->na_bdg;
+	if (b == NULL)
+		return NULL;
+	return b->bdg_basename;
+}
+
+
+#ifndef CONFIG_NET_NS
+/*
+ * XXX in principle nm_bridges could be created dynamically
+ * Right now we have a static array and deletions are protected
+ * by an exclusive lock.
+ */
+static struct nm_bridge *nm_bridges;
+#endif /* !CONFIG_NET_NS */
+
+
+static int
+nm_is_id_char(const char c)
+{
+	return (c >= 'a' && c <= 'z') ||
+	       (c >= 'A' && c <= 'Z') ||
+	       (c >= '0' && c <= '9') ||
+	       (c == '_');
+}
+
+/* Validate the name of a VALE bridge port and return the
+ * position of the ":" character. */
+static int
+nm_vale_name_validate(const char *name)
+{
+	int colon_pos = -1;
+	int i;
+
+	if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
+		return -1;
+	}
+
+	for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
+		if (name[i] == ':') {
+			colon_pos = i;
+			break;
+		} else if (!nm_is_id_char(name[i])) {
+			return -1;
+		}
+	}
+
+	if (strlen(name) - colon_pos > IFNAMSIZ) {
+		/* interface name too long */
+		return -1;
+	}
+
+	return colon_pos;
+}
+
+/*
+ * locate a bridge among the existing ones.
+ * MUST BE CALLED WITH NMG_LOCK()
+ *
+ * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
+ * We assume that this is called with a name of at least NM_NAME chars.
+ */
+struct nm_bridge *
+nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops)
+{
+	int i, namelen;
+	struct nm_bridge *b = NULL, *bridges;
+	u_int num_bridges;
+
+	NMG_LOCK_ASSERT();
+
+	netmap_bns_getbridges(&bridges, &num_bridges);
+
+	namelen = nm_vale_name_validate(name);
+	if (namelen < 0) {
+		D("invalid bridge name %s", name ? name : NULL);
+		return NULL;
+	}
+
+	/* lookup the name, remember empty slot if there is one */
+	for (i = 0; i < num_bridges; i++) {
+		struct nm_bridge *x = bridges + i;
+
+		if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
+			if (create && b == NULL)
+				b = x;	/* record empty slot */
+		} else if (x->bdg_namelen != namelen) {
+			continue;
+		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
+			ND("found '%.*s' at %d", namelen, name, i);
+			b = x;
+			break;
+		}
+	}
+	if (i == num_bridges && b) { /* name not found, can create entry */
+		/* initialize the bridge */
+		ND("create new bridge %s with ports %d", b->bdg_basename,
+			b->bdg_active_ports);
+		b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
+		if (b->ht == NULL) {
+			D("failed to allocate hash table");
+			return NULL;
+		}
+		strncpy(b->bdg_basename, name, namelen);
+		b->bdg_namelen = namelen;
+		b->bdg_active_ports = 0;
+		for (i = 0; i < NM_BDG_MAXPORTS; i++)
+			b->bdg_port_index[i] = i;
+		/* set the default function */
+		b->bdg_ops = ops;
+		b->private_data = b->ht;
+		b->bdg_flags = 0;
+		NM_BNS_GET(b);
+	}
+	return b;
+}
+
+
+int
+netmap_bdg_free(struct nm_bridge *b)
+{
+	if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
+		return EBUSY;
+	}
+
+	ND("marking bridge %s as free", b->bdg_basename);
+	nm_os_free(b->ht);
+	b->bdg_ops = NULL;
+	b->bdg_flags = 0;
+	NM_BNS_PUT(b);
+	return 0;
+}
+
+
+/* remove from bridge b the ports in slots hw and sw
+ * (sw can be -1 if not needed)
+ */
+void
+netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
+{
+	int s_hw = hw, s_sw = sw;
+	int i, lim =b->bdg_active_ports;
+	uint32_t *tmp = b->tmp_bdg_port_index;
+
+	/*
+	New algorithm:
+	make a copy of bdg_port_index;
+	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
+	in the array of bdg_port_index, replacing them with
+	entries from the bottom of the array;
+	decrement bdg_active_ports;
+	acquire BDG_WLOCK() and copy back the array.
+	 */
+
+	if (netmap_verbose)
+		D("detach %d and %d (lim %d)", hw, sw, lim);
+	/* make a copy of the list of active ports, update it,
+	 * and then copy back within BDG_WLOCK().
+	 */
+	memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index));
+	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
+		if (hw >= 0 && tmp[i] == hw) {
+			ND("detach hw %d at %d", hw, i);
+			lim--; /* point to last active port */
+			tmp[i] = tmp[lim]; /* swap with i */
+			tmp[lim] = hw;	/* now this is inactive */
+			hw = -1;
+		} else if (sw >= 0 && tmp[i] == sw) {
+			ND("detach sw %d at %d", sw, i);
+			lim--;
+			tmp[i] = tmp[lim];
+			tmp[lim] = sw;
+			sw = -1;
+		} else {
+			i++;
+		}
+	}
+	if (hw >= 0 || sw >= 0) {
+		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
+	}
+
+	BDG_WLOCK(b);
+	if (b->bdg_ops->dtor)
+		b->bdg_ops->dtor(b->bdg_ports[s_hw]);
+	b->bdg_ports[s_hw] = NULL;
+	if (s_sw >= 0) {
+		b->bdg_ports[s_sw] = NULL;
+	}
+	memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index));
+	b->bdg_active_ports = lim;
+	BDG_WUNLOCK(b);
+
+	ND("now %d active ports", lim);
+	netmap_bdg_free(b);
+}
+
+
+/* nm_bdg_ctl callback for VALE ports */
+int
+netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
+{
+	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
+	struct nm_bridge *b = vpna->na_bdg;
+
+	if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+		return 0; /* nothing to do */
+	}
+	if (b) {
+		netmap_set_all_rings(na, 0 /* disable */);
+		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
+		vpna->na_bdg = NULL;
+		netmap_set_all_rings(na, 1 /* enable */);
+	}
+	/* I have took reference just for attach */
+	netmap_adapter_put(na);
+	return 0;
+}
+
+int
+netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
+		struct nm_bridge *b)
+{
+	return NM_NEED_BWRAP;
+}
+
+/* Try to get a reference to a netmap adapter attached to a VALE switch.
+ * If the adapter is found (or is created), this function returns 0, a
+ * non NULL pointer is returned into *na, and the caller holds a
+ * reference to the adapter.
+ * If an adapter is not found, then no reference is grabbed and the
+ * function returns an error code, or 0 if there is just a VALE prefix
+ * mismatch. Therefore the caller holds a reference when
+ * (*na != NULL && return == 0).
+ */
+int
+netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+	struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops)
+{
+	char *nr_name = hdr->nr_name;
+	const char *ifname;
+	struct ifnet *ifp = NULL;
+	int error = 0;
+	struct netmap_vp_adapter *vpna, *hostna = NULL;
+	struct nm_bridge *b;
+	uint32_t i, j;
+	uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
+	int needed;
+
+	*na = NULL;     /* default return value */
+
+	/* first try to see if this is a bridge port. */
+	NMG_LOCK_ASSERT();
+	if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) {
+		return 0;  /* no error, but no VALE prefix */
+	}
+
+	b = nm_find_bridge(nr_name, create, ops);
+	if (b == NULL) {
+		ND("no bridges available for '%s'", nr_name);
+		return (create ? ENOMEM : ENXIO);
+	}
+	if (strlen(nr_name) < b->bdg_namelen) /* impossible */
+		panic("x");
+
+	/* Now we are sure that name starts with the bridge's name,
+	 * lookup the port in the bridge. We need to scan the entire
+	 * list. It is not important to hold a WLOCK on the bridge
+	 * during the search because NMG_LOCK already guarantees
+	 * that there are no other possible writers.
+	 */
+
+	/* lookup in the local list of ports */
+	for (j = 0; j < b->bdg_active_ports; j++) {
+		i = b->bdg_port_index[j];
+		vpna = b->bdg_ports[i];
+		ND("checking %s", vpna->up.name);
+		if (!strcmp(vpna->up.name, nr_name)) {
+			netmap_adapter_get(&vpna->up);
+			ND("found existing if %s refs %d", nr_name)
+			*na = &vpna->up;
+			return 0;
+		}
+	}
+	/* not found, should we create it? */
+	if (!create)
+		return ENXIO;
+	/* yes we should, see if we have space to attach entries */
+	needed = 2; /* in some cases we only need 1 */
+	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
+		D("bridge full %d, cannot create new port", b->bdg_active_ports);
+		return ENOMEM;
+	}
+	/* record the next two ports available, but do not allocate yet */
+	cand = b->bdg_port_index[b->bdg_active_ports];
+	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
+	ND("+++ bridge %s port %s used %d avail %d %d",
+		b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
+
+	/*
+	 * try see if there is a matching NIC with this name
+	 * (after the bridge's name)
+	 */
+	ifname = nr_name + b->bdg_namelen + 1;
+	ifp = ifunit_ref(ifname);
+	if (!ifp) {
+		/* Create an ephemeral virtual port.
+		 * This block contains all the ephemeral-specific logic.
+		 */
+
+		if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
+			error = EINVAL;
+			goto out;
+		}
+
+		/* bdg_netmap_attach creates a struct netmap_adapter */
+		error = b->bdg_ops->vp_create(hdr, NULL, nmd, &vpna);
+		if (error) {
+			D("error %d", error);
+			goto out;
+		}
+		/* shortcut - we can skip get_hw_na(),
+		 * ownership check and nm_bdg_attach()
+		 */
+
+	} else {
+		struct netmap_adapter *hw;
+
+		/* the vale:nic syntax is only valid for some commands */
+		switch (hdr->nr_reqtype) {
+		case NETMAP_REQ_VALE_ATTACH:
+		case NETMAP_REQ_VALE_DETACH:
+		case NETMAP_REQ_VALE_POLLING_ENABLE:
+		case NETMAP_REQ_VALE_POLLING_DISABLE:
+			break; /* ok */
+		default:
+			error = EINVAL;
+			goto out;
+		}
+
+		error = netmap_get_hw_na(ifp, nmd, &hw);
+		if (error || hw == NULL)
+			goto out;
+
+		/* host adapter might not be created */
+		error = hw->nm_bdg_attach(nr_name, hw, b);
+		if (error == NM_NEED_BWRAP) {
+			error = b->bdg_ops->bwrap_attach(nr_name, hw);
+		}
+		if (error)
+			goto out;
+		vpna = hw->na_vp;
+		hostna = hw->na_hostvp;
+		if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+			/* Check if we need to skip the host rings. */
+			struct nmreq_vale_attach *areq =
+				(struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+			if (areq->reg.nr_mode != NR_REG_NIC_SW) {
+				hostna = NULL;
+			}
+		}
+	}
+
+	BDG_WLOCK(b);
+	vpna->bdg_port = cand;
+	ND("NIC  %p to bridge port %d", vpna, cand);
+	/* bind the port to the bridge (virtual ports are not active) */
+	b->bdg_ports[cand] = vpna;
+	vpna->na_bdg = b;
+	b->bdg_active_ports++;
+	if (hostna != NULL) {
+		/* also bind the host stack to the bridge */
+		b->bdg_ports[cand2] = hostna;
+		hostna->bdg_port = cand2;
+		hostna->na_bdg = b;
+		b->bdg_active_ports++;
+		ND("host %p to bridge port %d", hostna, cand2);
+	}
+	ND("if %s refs %d", ifname, vpna->up.na_refcount);
+	BDG_WUNLOCK(b);
+	*na = &vpna->up;
+	netmap_adapter_get(*na);
+
+out:
+	if (ifp)
+		if_rele(ifp);
+
+	return error;
+}
+
+/* Process NETMAP_REQ_VALE_ATTACH.
+ */
+int
+nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token)
+{
+	struct nmreq_vale_attach *req =
+		(struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+	struct netmap_vp_adapter * vpna;
+	struct netmap_adapter *na = NULL;
+	struct netmap_mem_d *nmd = NULL;
+	struct nm_bridge *b = NULL;
+	int error;
+
+	NMG_LOCK();
+	/* permission check for modified bridges */
+	b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+	if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+		error = EACCES;
+		goto unlock_exit;
+	}
+
+	if (req->reg.nr_mem_id) {
+		nmd = netmap_mem_find(req->reg.nr_mem_id);
+		if (nmd == NULL) {
+			error = EINVAL;
+			goto unlock_exit;
+		}
+	}
+
+	/* check for existing one */
+	error = netmap_get_vale_na(hdr, &na, nmd, 0);
+	if (na) {
+		error = EBUSY;
+		goto unref_exit;
+	}
+	error = netmap_get_vale_na(hdr, &na,
+				nmd, 1 /* create if not exists */);
+	if (error) { /* no device */
+		goto unlock_exit;
+	}
+
+	if (na == NULL) { /* VALE prefix missing */
+		error = EINVAL;
+		goto unlock_exit;
+	}
+
+	if (NETMAP_OWNED_BY_ANY(na)) {
+		error = EBUSY;
+		goto unref_exit;
+	}
+
+	if (na->nm_bdg_ctl) {
+		/* nop for VALE ports. The bwrap needs to put the hwna
+		 * in netmap mode (see netmap_bwrap_bdg_ctl)
+		 */
+		error = na->nm_bdg_ctl(hdr, na);
+		if (error)
+			goto unref_exit;
+		ND("registered %s to netmap-mode", na->name);
+	}
+	vpna = (struct netmap_vp_adapter *)na;
+	req->port_index = vpna->bdg_port;
+	NMG_UNLOCK();
+	return 0;
+
+unref_exit:
+	netmap_adapter_put(na);
+unlock_exit:
+	NMG_UNLOCK();
+	return error;
+}
+
+static inline int
+nm_is_bwrap(struct netmap_adapter *na)
+{
+	return na->nm_register == netmap_bwrap_reg;
+}
+
+/* Process NETMAP_REQ_VALE_DETACH.
+ */
+int
+nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token)
+{
+	struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
+	struct netmap_vp_adapter *vpna;
+	struct netmap_adapter *na;
+	struct nm_bridge *b = NULL;
+	int error;
+
+	NMG_LOCK();
+	/* permission check for modified bridges */
+	b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+	if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+		error = EACCES;
+		goto unlock_exit;
+	}
+
+	error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
+	if (error) { /* no device, or another bridge or user owns the device */
+		goto unlock_exit;
+	}
+
+	if (na == NULL) { /* VALE prefix missing */
+		error = EINVAL;
+		goto unlock_exit;
+	} else if (nm_is_bwrap(na) &&
+		   ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
+		/* Don't detach a NIC with polling */
+		error = EBUSY;
+		goto unref_exit;
+	}
+
+	vpna = (struct netmap_vp_adapter *)na;
+	if (na->na_vp != vpna) {
+		/* trying to detach first attach of VALE persistent port attached
+		 * to 2 bridges
+		 */
+		error = EBUSY;
+		goto unref_exit;
+	}
+	nmreq_det->port_index = vpna->bdg_port;
+
+	if (na->nm_bdg_ctl) {
+		/* remove the port from bridge. The bwrap
+		 * also needs to put the hwna in normal mode
+		 */
+		error = na->nm_bdg_ctl(hdr, na);
+	}
+
+unref_exit:
+	netmap_adapter_put(na);
+unlock_exit:
+	NMG_UNLOCK();
+	return error;
+
+}
+
+struct nm_bdg_polling_state;
+struct
+nm_bdg_kthread {
+	struct nm_kctx *nmk;
+	u_int qfirst;
+	u_int qlast;
+	struct nm_bdg_polling_state *bps;
+};
+
+struct nm_bdg_polling_state {
+	bool configured;
+	bool stopped;
+	struct netmap_bwrap_adapter *bna;
+	uint32_t mode;
+	u_int qfirst;
+	u_int qlast;
+	u_int cpu_from;
+	u_int ncpus;
+	struct nm_bdg_kthread *kthreads;
+};
+
+static void
+netmap_bwrap_polling(void *data, int is_kthread)
+{
+	struct nm_bdg_kthread *nbk = data;
+	struct netmap_bwrap_adapter *bna;
+	u_int qfirst, qlast, i;
+	struct netmap_kring **kring0, *kring;
+
+	if (!nbk)
+		return;
+	qfirst = nbk->qfirst;
+	qlast = nbk->qlast;
+	bna = nbk->bps->bna;
+	kring0 = NMR(bna->hwna, NR_RX);
+
+	for (i = qfirst; i < qlast; i++) {
+		kring = kring0[i];
+		kring->nm_notify(kring, 0);
+	}
+}
+
+static int
+nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
+{
+	struct nm_kctx_cfg kcfg;
+	int i, j;
+
+	bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
+	if (bps->kthreads == NULL)
+		return ENOMEM;
+
+	bzero(&kcfg, sizeof(kcfg));
+	kcfg.worker_fn = netmap_bwrap_polling;
+	kcfg.use_kthread = 1;
+	for (i = 0; i < bps->ncpus; i++) {
+		struct nm_bdg_kthread *t = bps->kthreads + i;
+		int all = (bps->ncpus == 1 &&
+			bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
+		int affinity = bps->cpu_from + i;
+
+		t->bps = bps;
+		t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
+		t->qlast = all ? bps->qlast : t->qfirst + 1;
+		D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
+			t->qlast);
+
+		kcfg.type = i;
+		kcfg.worker_private = t;
+		t->nmk = nm_os_kctx_create(&kcfg, NULL);
+		if (t->nmk == NULL) {
+			goto cleanup;
+		}
+		nm_os_kctx_worker_setaff(t->nmk, affinity);
+	}
+	return 0;
+
+cleanup:
+	for (j = 0; j < i; j++) {
+		struct nm_bdg_kthread *t = bps->kthreads + i;
+		nm_os_kctx_destroy(t->nmk);
+	}
+	nm_os_free(bps->kthreads);
+	return EFAULT;
+}
+
+/* A variant of ptnetmap_start_kthreads() */
+static int
+nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
+{
+	int error, i, j;
+
+	if (!bps) {
+		D("polling is not configured");
+		return EFAULT;
+	}
+	bps->stopped = false;
+
+	for (i = 0; i < bps->ncpus; i++) {
+		struct nm_bdg_kthread *t = bps->kthreads + i;
+		error = nm_os_kctx_worker_start(t->nmk);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201810300836.w9U8abSf000249>