Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 23 Oct 2012 14:19:45 +0000 (UTC)
From:      Andre Oppermann <andre@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r241931 - in head/sys: conf kern
Message-ID:  <201210231419.q9NEJjYH082863@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: andre
Date: Tue Oct 23 14:19:44 2012
New Revision: 241931
URL: http://svn.freebsd.org/changeset/base/241931

Log:
  Replace the ill-named ZERO_COPY_SOCKET kernel option with two
  more appropriate named kernel options for the very distinct
  send and receive path.
  
  "options SOCKET_SEND_COW" enables VM page copy-on-write based
  sending of data on an outbound socket.
  
  NB: The COW based send mechanism is not safe and may result
  in kernel crashes.
  
  "options SOCKET_RECV_PFLIP" enables VM kernel/userspace page
  flipping for special disposable pages attached as external
  storage to mbufs.
  
  Only the naming of the kernel options is changed and their
  corresponding #ifdef sections are adjusted.  No functionality
  is added or removed.
  
  Discussed with:	alc (mechanism and limitations of send side COW)

Modified:
  head/sys/conf/NOTES
  head/sys/conf/options
  head/sys/kern/subr_uio.c
  head/sys/kern/uipc_socket.c

Modified: head/sys/conf/NOTES
==============================================================================
--- head/sys/conf/NOTES	Tue Oct 23 12:39:17 2012	(r241930)
+++ head/sys/conf/NOTES	Tue Oct 23 14:19:44 2012	(r241931)
@@ -964,12 +964,20 @@ options 	TCP_SIGNATURE		#include support
 # a smooth scheduling of the traffic.
 options 	DUMMYNET
 
-# Zero copy sockets support.  This enables "zero copy" for sending and
-# receiving data via a socket.  The send side works for any type of NIC,
-# the receive side only works for NICs that support MTUs greater than the
-# page size of your architecture and that support header splitting.  See
-# zero_copy(9) for more details.
-options 	ZERO_COPY_SOCKETS
+# "Zero copy" sockets support is split into the send and receive path
+# which operate very differently.
+# For the send path the VM page with the data is wired into the kernel
+# and marked as COW (copy-on-write).  If the application touches the
+# data while it is still in the send socket buffer the page is copied
+# and divorced from its kernel wiring (no longer zero copy).
+# The receive side requires explicit NIC driver support to create
+# disposable pages which are flipped from kernel to user-space VM.
+# See zero_copy(9) for more details.
+# XXX: The COW based send mechanism is not safe and may result in
+# kernel crashes.
+# XXX: None of the current NIC drivers support disposeable pages.
+options		SOCKET_SEND_COW
+options		SOCKET_RECV_PFLIP
 
 #####################################################################
 # FILESYSTEM OPTIONS

Modified: head/sys/conf/options
==============================================================================
--- head/sys/conf/options	Tue Oct 23 12:39:17 2012	(r241930)
+++ head/sys/conf/options	Tue Oct 23 14:19:44 2012	(r241931)
@@ -520,7 +520,8 @@ NGATM_CCATM		opt_netgraph.h
 # DRM options
 DRM_DEBUG		opt_drm.h
 
-ZERO_COPY_SOCKETS	opt_zero.h
+SOCKET_SEND_COW		opt_zero.h
+SOCKET_RECV_PFLIP	opt_zero.h
 TI_SF_BUF_JUMBO		opt_ti.h
 TI_JUMBO_HDRSPLIT	opt_ti.h
 BCE_JUMBO_HDRSPLIT	opt_bce.h

Modified: head/sys/kern/subr_uio.c
==============================================================================
--- head/sys/kern/subr_uio.c	Tue Oct 23 12:39:17 2012	(r241930)
+++ head/sys/kern/subr_uio.c	Tue Oct 23 14:19:44 2012	(r241931)
@@ -57,7 +57,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_extern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
-#ifdef ZERO_COPY_SOCKETS
+#ifdef SOCKET_SEND_COW
 #include <vm/vm_object.h>
 #endif
 
@@ -66,7 +66,7 @@ SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max,
 
 static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault);
 
-#ifdef ZERO_COPY_SOCKETS
+#ifdef SOCKET_SEND_COW
 /* Declared in uipc_socket.c */
 extern int so_zero_copy_receive;
 
@@ -128,7 +128,7 @@ retry:
 	vm_map_lookup_done(map, entry);
 	return(KERN_SUCCESS);
 }
-#endif /* ZERO_COPY_SOCKETS */
+#endif /* SOCKET_SEND_COW */
 
 int
 copyin_nofault(const void *udaddr, void *kaddr, size_t len)
@@ -261,7 +261,7 @@ uiomove_frombuf(void *buf, int buflen, s
 	return (uiomove((char *)buf + offset, n, uio));
 }
 
-#ifdef ZERO_COPY_SOCKETS
+#ifdef SOCKET_RECV_PFLIP
 /*
  * Experimental support for zero-copy I/O
  */
@@ -356,7 +356,7 @@ uiomoveco(void *cp, int n, struct uio *u
 	}
 	return (0);
 }
-#endif /* ZERO_COPY_SOCKETS */
+#endif /* SOCKET_RECV_PFLIP */
 
 /*
  * Give next character to user as result of read.

Modified: head/sys/kern/uipc_socket.c
==============================================================================
--- head/sys/kern/uipc_socket.c	Tue Oct 23 12:39:17 2012	(r241930)
+++ head/sys/kern/uipc_socket.c	Tue Oct 23 14:19:44 2012	(r241931)
@@ -219,17 +219,20 @@ static int numopensockets;
 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
     &numopensockets, 0, "Number of open sockets");
 
-#ifdef ZERO_COPY_SOCKETS
-/* These aren't static because they're used in other files. */
-int so_zero_copy_send = 1;
-int so_zero_copy_receive = 1;
+#if defined(SOCKET_SEND_COW) || defined(SOCKET_RECV_PFLIP)
 SYSCTL_NODE(_kern_ipc, OID_AUTO, zero_copy, CTLFLAG_RD, 0,
     "Zero copy controls");
+#ifdef SOCKET_RECV_PFLIP
+int so_zero_copy_receive = 1;
 SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, receive, CTLFLAG_RW,
     &so_zero_copy_receive, 0, "Enable zero copy receive");
+#endif
+#ifdef SOCKET_SEND_COW
+int so_zero_copy_send = 1;
 SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW,
     &so_zero_copy_send, 0, "Enable zero copy send");
-#endif /* ZERO_COPY_SOCKETS */
+#endif /* SOCKET_SEND_COW */
+#endif /* SOCKET_SEND_COW || SOCKET_RECV_PFLIP */
 
 /*
  * accept_mtx locks down per-socket fields relating to accept queues.  See
@@ -903,7 +906,7 @@ sodisconnect(struct socket *so)
 	return (error);
 }
 
-#ifdef ZERO_COPY_SOCKETS
+#ifdef SOCKET_SEND_COW
 struct so_zerocopy_stats{
 	int size_ok;
 	int align_ok;
@@ -1008,7 +1011,7 @@ out:
 	*retmp = top;
 	return (error);
 }
-#endif /* ZERO_COPY_SOCKETS */
+#endif /* SOCKET_SEND_COW */
 
 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
 
@@ -1019,7 +1022,7 @@ sosend_dgram(struct socket *so, struct s
 	long space;
 	ssize_t resid;
 	int clen = 0, error, dontroute;
-#ifdef ZERO_COPY_SOCKETS
+#ifdef SOCKET_SEND_COW
 	int atomic = sosendallatonce(so) || top;
 #endif
 
@@ -1104,7 +1107,7 @@ sosend_dgram(struct socket *so, struct s
 		if (flags & MSG_EOR)
 			top->m_flags |= M_EOR;
 	} else {
-#ifdef ZERO_COPY_SOCKETS
+#ifdef SOCKET_SEND_COW
 		error = sosend_copyin(uio, &top, atomic, &space, flags);
 		if (error)
 			goto out;
@@ -1121,7 +1124,7 @@ sosend_dgram(struct socket *so, struct s
 			goto out;
 		}
 		space -= resid - uio->uio_resid;
-#endif
+#endif /* SOCKET_SEND_COW */
 		resid = uio->uio_resid;
 	}
 	KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
@@ -1293,7 +1296,7 @@ restart:
 				if (flags & MSG_EOR)
 					top->m_flags |= M_EOR;
 			} else {
-#ifdef ZERO_COPY_SOCKETS
+#ifdef SOCKET_SEND_COW
 				error = sosend_copyin(uio, &top, atomic,
 				    &space, flags);
 				if (error != 0)
@@ -1313,7 +1316,7 @@ restart:
 					goto release;
 				}
 				space -= resid - uio->uio_resid;
-#endif
+#endif /* SOCKET_SEND_COW */
 				resid = uio->uio_resid;
 			}
 			if (dontroute) {
@@ -1405,7 +1408,7 @@ soreceive_rcvoob(struct socket *so, stru
 	if (error)
 		goto bad;
 	do {
-#ifdef ZERO_COPY_SOCKETS
+#ifdef SOCKET_RECV_PFLIP
 		if (so_zero_copy_receive) {
 			int disposable;
 
@@ -1419,7 +1422,7 @@ soreceive_rcvoob(struct socket *so, stru
 					  min(uio->uio_resid, m->m_len),
 					  uio, disposable);
 		} else
-#endif /* ZERO_COPY_SOCKETS */
+#endif /* SOCKET_RECV_PFLIP */
 		error = uiomove(mtod(m, void *),
 		    (int) min(uio->uio_resid, m->m_len), uio);
 		m = m_free(m);
@@ -1743,7 +1746,7 @@ dontblock:
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			SOCKBUF_UNLOCK(&so->so_rcv);
-#ifdef ZERO_COPY_SOCKETS
+#ifdef SOCKET_RECV_PFLIP
 			if (so_zero_copy_receive) {
 				int disposable;
 
@@ -1757,7 +1760,7 @@ dontblock:
 						  (int)len, uio,
 						  disposable);
 			} else
-#endif /* ZERO_COPY_SOCKETS */
+#endif /* SOCKET_RECV_PFLIP */
 			error = uiomove(mtod(m, char *) + moff, (int)len, uio);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (error) {



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201210231419.q9NEJjYH082863>