Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 23 Jul 2006 19:57:56 +0100 (BST)
From:      Robert Watson <rwatson@FreeBSD.org>
To:        arch@FreeBSD.org
Subject:   sosend/soreceive consistency improvements
Message-ID:  <20060723171734.K35186@fledge.watson.org>

next in thread | raw e-mail | index | archive | help

As part of cleanups, locking, and optimization work, I've been looking at the 
socket send and receive paths.

In the past, work was done do allow the uio/mbuf chain send and receive paths 
(sosend, soreceive) to be pluggable for a protocol, so that the protocol could 
provide substitute implementations.  This is not, generally, currently used, 
although I recently changed UDP to use an optimized datagram send routine. 
This pluggability is made possible by virtue of each protocol providing its 
own pru_sosend() and pru_soreceive() methods in the protocol switch.

There's another side to the pluggability, however -- the socket consumers in 
the kernel, of which there are quite a few -- obviously the socket system 
calls, but also netgraph, distributed file systems, etc.  Some of these 
consumers have been modified to call so->so_proto->pr_usrreqs->pru_soreceive 
and ...->pru_sosend, but it turns out many haven't.  New references to 
sosend() and soreceive() periodically get encoded into consumers -- presumably 
because they are easy to spell, and in fact are generally functionally 
identical.  But not always!  It turns out that the NFS code isn't using the 
optimized UDP send path via sosend_dgram(), because it's calling sosend() 
directly.

Rather than continue in this "in between state", in which the uio/mbuf chain 
sosend and soreceive are reached via the protocol switch in each occurrence, I 
propose a change: sosend() and soreceive() will now be the formal APIs for 
sending and receiveing on sockets within the kernel, as is the case with many 
other so*() functions, and they will perform the protocol switch dereference. 
The existing functions are renamed to sosend_generic() and 
soreceive_generic(), and in most cases are never referenced by protocols since 
our protocol domain registration already uses sosend() and soreceive() as the 
defaults today.  The new code strikes me as quite a bit more readable, and 
likely easier for socket consumers to use.

Any thoughts and/or objections?

Robert N M Watson
Computer Laboratory
University of Cambridge

--- //depot/vendor/freebsd/src/sys/kern/sys_socket.c	2005/04/16 18:50:30
+++ //depot/user/rwatson/socleanup/src/sys/kern/sys_socket.c	2006/07/23 15:32:54
@@ -88,7 +88,7 @@
  		return (error);
  	}
  #endif
-	error = so->so_proto->pr_usrreqs->pru_soreceive(so, 0, uio, 0, 0, 0);
+	error = soreceive(so, 0, uio, 0, 0, 0);
  	NET_UNLOCK_GIANT();
  	return (error);
  }
@@ -115,8 +115,7 @@
  		return (error);
  	}
  #endif
-	error = so->so_proto->pr_usrreqs->pru_sosend(so, 0, uio, 0, 0, 0,
-						    uio->uio_td);
+	error = sosend(so, 0, uio, 0, 0, 0, uio->uio_td);
  	if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) {
  		PROC_LOCK(uio->uio_td->td_proc);
  		psignal(uio->uio_td->td_proc, SIGPIPE);
--- //depot/vendor/freebsd/src/sys/kern/uipc_domain.c	2006/07/11 23:21:53
+++ //depot/user/rwatson/socleanup/src/sys/kern/uipc_domain.c	2006/07/23 15:52:33
@@ -119,8 +119,8 @@
  	DEFAULT(pu->pru_rcvd, pru_rcvd_notsupp);
  	DEFAULT(pu->pru_rcvoob, pru_rcvoob_notsupp);
  	DEFAULT(pu->pru_sense, pru_sense_null);
-	DEFAULT(pu->pru_sosend, sosend);
-	DEFAULT(pu->pru_soreceive, soreceive);
+	DEFAULT(pu->pru_sosend, sosend_generic);
+	DEFAULT(pu->pru_soreceive, soreceive_generic);
  	DEFAULT(pu->pru_sopoll, sopoll);
  #undef DEFAULT
  	if (pr->pr_init)
--- //depot/vendor/freebsd/src/sys/kern/uipc_socket.c	2006/07/21 17:16:23
+++ //depot/user/rwatson/socleanup/src/sys/kern/uipc_socket.c	2006/07/23 15:32:54
@@ -1087,7 +1087,7 @@
   */
  #define	snderr(errno)	{ error = (errno); goto release; }
  int
-sosend(so, addr, uio, top, control, flags, td)
+sosend_generic(so, addr, uio, top, control, flags, td)
  	struct socket *so;
  	struct sockaddr *addr;
  	struct uio *uio;
@@ -1249,6 +1249,25 @@
  }
  #undef snderr

+int
+sosend(so, addr, uio, top, control, flags, td)
+	struct socket *so;
+	struct sockaddr *addr;
+	struct uio *uio;
+	struct mbuf *top;
+	struct mbuf *control;
+	int flags;
+	struct thread *td;
+{
+
+	/* XXXRW: Temporary debugging. */
+	KASSERT(so->so_proto->pr_usrreqs->pru_sosend != sosend,
+	    ("sosend: protocol calls sosend"));
+
+	return (so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top,
+	    control, flags, td));
+}
+
  /*
   * The part of soreceive() that implements reading non-inline out-of-band
   * data from a socket.  For more complete comments, see soreceive(), from
@@ -1354,7 +1373,7 @@
   * only for the count in uio_resid.
   */
  int
-soreceive(so, psa, uio, mp0, controlp, flagsp)
+soreceive_generic(so, psa, uio, mp0, controlp, flagsp)
  	struct socket *so;
  	struct sockaddr **psa;
  	struct uio *uio;
@@ -1794,6 +1813,24 @@
  }

  int
+soreceive(so, psa, uio, mp0, controlp, flagsp)
+	struct socket *so;
+	struct sockaddr **psa;
+	struct uio *uio;
+	struct mbuf **mp0;
+	struct mbuf **controlp;
+	int *flagsp;
+{
+
+	/* XXXRW: Temporary debugging. */
+	KASSERT(so->so_proto->pr_usrreqs->pru_soreceive != soreceive,
+	    ("soreceive: protocol calls soreceive"));
+
+	return (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0,
+	    controlp, flagsp));
+}
+
+int
  soshutdown(so, how)
  	struct socket *so;
  	int how;
--- //depot/vendor/freebsd/src/sys/kern/uipc_syscalls.c	2006/07/19 18:31:24
+++ //depot/user/rwatson/socleanup/src/sys/kern/uipc_syscalls.c	2006/07/23 15:32:54
@@ -803,8 +803,7 @@
  		ktruio = cloneuio(&auio);
  #endif
  	len = auio.uio_resid;
-	error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
-	    0, control, flags, td);
+	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
  	if (error) {
  		if (auio.uio_resid != len && (error == ERESTART ||
  		    error == EINTR || error == EWOULDBLOCK))
@@ -1020,8 +1019,7 @@
  		ktruio = cloneuio(&auio);
  #endif
  	len = auio.uio_resid;
-	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
-	    (struct mbuf **)0,
+	error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
  	    (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
  	    &mp->msg_flags);
  	if (error) {
--- //depot/vendor/freebsd/src/sys/kern/uipc_usrreq.c	2006/07/23 12:02:30
+++ //depot/user/rwatson/socleanup/src/sys/kern/uipc_usrreq.c	2006/07/23 16:05:31
@@ -768,8 +768,8 @@
  	.pru_sense =		uipc_sense,
  	.pru_shutdown =		uipc_shutdown,
  	.pru_sockaddr =		uipc_sockaddr,
-	.pru_sosend =		sosend,
-	.pru_soreceive =	soreceive,
+	.pru_sosend =		sosend_generic,
+	.pru_soreceive =	soreceive_generic,
  	.pru_sopoll =		sopoll,
  	.pru_close =		uipc_close,
  };
--- //depot/vendor/freebsd/src/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c	2006/07/21 17:11:33
+++ //depot/user/rwatson/socleanup/src/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c	2006/07/23 15:32:54
@@ -1558,8 +1558,8 @@
  		flags = MSG_DONTWAIT;

  		m = NULL;
-		error = (*s->l2so->so_proto->pr_usrreqs->pru_soreceive)(s->l2so,
-				NULL, &uio, &m, (struct mbuf **) NULL, &flags);
+		error = soreceive(s->l2so, NULL, &uio, &m,
+		    (struct mbuf **) NULL, &flags);
  		if (error != 0) {
  			if (error == EWOULDBLOCK)
  				return (0); /* XXX can happen? */
@@ -1610,9 +1610,8 @@
  			return (0); /* we are done */

  		/* Call send function on the L2CAP socket */
-		error = (*s->l2so->so_proto->pr_usrreqs->pru_sosend)
-				(s->l2so, NULL, NULL, m, NULL, 0,
-				 curthread /* XXX */);
+		error = sosend(s->l2so, NULL, NULL, m, NULL, 0,
+		    curthread /* XXX */);
  		if (error != 0) {
  			NG_BTSOCKET_RFCOMM_ERR(
  "%s: Could not send data to L2CAP socket, error=%d\n", __func__, error);
--- //depot/vendor/freebsd/src/sys/netgraph/ng_ksocket.c	2006/02/21 13:08:33
+++ //depot/user/rwatson/socleanup/src/sys/netgraph/ng_ksocket.c	2006/07/23 15:32:54
@@ -920,7 +920,7 @@
  		sa = &stag->sa;

  	/* Send packet */
-	error = (*so->so_proto->pr_usrreqs->pru_sosend)(so, sa, 0, m, 0, 0, td);
+	error = sosend(so, sa, 0, m, 0, 0, td);

  	return (error);
  }
@@ -1101,9 +1101,8 @@
  		struct mbuf *n;

  		/* Try to get next packet from socket */
-		if ((error = (*so->so_proto->pr_usrreqs->pru_soreceive)
-		    (so, (so->so_state & SS_ISCONNECTED) ? NULL : &sa,
-		    &auio, &m, (struct mbuf **)0, &flags)) != 0)
+		if ((error = soreceive(so, (so->so_state & SS_ISCONNECTED) ?
+		    NULL : &sa, &auio, &m, (struct mbuf **)0, &flags)) != 0)
  			break;

  		/* See if we got anything */
--- //depot/vendor/freebsd/src/sys/netncp/ncp_sock.c	2005/01/07 01:52:23
+++ //depot/user/rwatson/socleanup/src/sys/netncp/ncp_sock.c	2006/07/23 15:32:54
@@ -139,10 +139,9 @@
  	auio.uio_td = td;
  	flags = MSG_DONTWAIT;

-/*	error = so->so_proto->pr_usrreqs->pru_soreceive(so, 0, &auio,
-	    (struct mbuf **)0, (struct mbuf **)0, &flags);*/
-	error = so->so_proto->pr_usrreqs->pru_soreceive(so, 0, &auio,
-	    mp, (struct mbuf **)0, &flags);
+/*	error = soreceive(so, 0, &auio, (struct mbuf **)0, (struct mbuf **)0,
+	    &flags);*/
+	error = soreceive(so, 0, &auio, mp, (struct mbuf **)0, &flags);
  	*rlen = len - auio.uio_resid;
  /*	if (!error) {
  	    *rlen=iov.iov_len;
@@ -168,7 +167,7 @@
  	for (;;) {
  		m = m_copym(top, 0, M_COPYALL, M_TRYWAIT);
  /*		NCPDDEBUG(m);*/
-		error = so->so_proto->pr_usrreqs->pru_sosend(so, to, 0, m, 0, flags, td);
+		error = sosend(so, to, 0, m, 0, flags, td);
  		if (error == 0 || error == EINTR || error == ENETDOWN)
  			break;
  		if (rqp->rexmit == 0) break;
@@ -443,8 +442,8 @@
  		auio.uio_resid = len = 1000000;
  		auio.uio_td = curthread;
  		flags = MSG_DONTWAIT;
-		error = so->so_proto->pr_usrreqs->pru_soreceive(so,
-		    (struct sockaddr**)&sa, &auio, &m, (struct mbuf**)0, &flags);
+		error = soreceive(so, (struct sockaddr**)&sa, &auio, &m,
+		    (struct mbuf**)0, &flags);
  		if (error) break;
  		len -= auio.uio_resid;
  		NCPSDEBUG("got watch dog %d\n",len);
@@ -452,7 +451,7 @@
  		buf = mtod(m, char*);
  		if (buf[1] != '?') break;
  		buf[1] = 'Y';
-		error = so->so_proto->pr_usrreqs->pru_sosend(so, (struct sockaddr*)sa, 0, m, 0, 0, curthread);
+		error = sosend(so, (struct sockaddr*)sa, 0, m, 0, 0, curthread);
  		NCPSDEBUG("send watch dog %d\n",error);
  		break;
  	}
--- //depot/vendor/freebsd/src/sys/netsmb/smb_trantcp.c	2005/01/07 01:52:23
+++ //depot/user/rwatson/socleanup/src/sys/netsmb/smb_trantcp.c	2006/07/23 15:32:54
@@ -75,8 +75,7 @@
  SYSCTL_INT(_net_smb, OID_AUTO, tcpsndbuf, CTLFLAG_RW, &smb_tcpsndbuf, 0, "");
  SYSCTL_INT(_net_smb, OID_AUTO, tcprcvbuf, CTLFLAG_RW, &smb_tcprcvbuf, 0, "");

-#define nb_sosend(so,m,flags,td) (so)->so_proto->pr_usrreqs->pru_sosend( \
-				    so, NULL, 0, m, 0, flags, td)
+#define nb_sosend(so,m,flags,td) sosend(so, NULL, 0, m, 0, flags, td)

  static int  nbssn_recv(struct nbpcb *nbp, struct mbuf **mpp, int *lenp,
  	u_int8_t *rpcodep, struct thread *td);
@@ -377,8 +376,7 @@
  	auio.uio_offset = 0;
  	auio.uio_resid = sizeof(len);
  	auio.uio_td = td;
-	error = so->so_proto->pr_usrreqs->pru_soreceive
-	    (so, (struct sockaddr **)NULL, &auio,
+	error = soreceive(so, (struct sockaddr **)NULL, &auio,
  	    (struct mbuf **)NULL, (struct mbuf **)NULL, &flags);
  	if (error)
  		return error;
@@ -461,8 +459,7 @@
  			 */
  			do {
  				rcvflg = MSG_WAITALL;
-				error = so->so_proto->pr_usrreqs->pru_soreceive
-				    (so, (struct sockaddr **)NULL,
+				error = soreceive(so, (struct sockaddr **)NULL,
  				    &auio, &tm, (struct mbuf **)NULL, &rcvflg);
  			} while (error == EWOULDBLOCK || error == EINTR ||
  				 error == ERESTART);
--- //depot/vendor/freebsd/src/sys/nfsclient/nfs_socket.c	2006/07/08 15:41:11
+++ //depot/user/rwatson/socleanup/src/sys/nfsclient/nfs_socket.c	2006/07/23 15:32:54
@@ -606,8 +606,7 @@
  	else
  		flags = 0;

-	error = so->so_proto->pr_usrreqs->pru_sosend(so, sendnam, 0, top, 0,
-						     flags, curthread /*XXX*/);
+	error = sosend(so, sendnam, 0, top, 0, flags, curthread /*XXX*/);
  	if (error == ENOBUFS && so->so_type == SOCK_DGRAM) {
  		error = 0;
  		mtx_lock(&rep->r_mtx);
@@ -946,9 +945,8 @@
  			auio.uio_iovcnt = 0;
  			mp = NULL;
  			rcvflg = (MSG_DONTWAIT | MSG_SOCALLBCK);
-			error =  so->so_proto->pr_usrreqs->pru_soreceive
-				(so, (struct sockaddr **)0,
-				 &auio, &mp, (struct mbuf **)0, &rcvflg);
+			error =  soreceive(so, (struct sockaddr **)0, &auio,
+			    &mp, (struct mbuf **)0, &rcvflg);
  			/*
  			 * We've already tested that the socket is readable. 2 cases
  			 * here, we either read 0 bytes (client closed connection), 
@@ -1016,9 +1014,8 @@
  			auio.uio_iovcnt = 0;
  			mp = NULL;
  			rcvflg = (MSG_DONTWAIT | MSG_SOCALLBCK);
-			error =  so->so_proto->pr_usrreqs->pru_soreceive
-				(so, (struct sockaddr **)0,
-				 &auio, &mp, (struct mbuf **)0, &rcvflg);
+			error =  soreceive(so, (struct sockaddr **)0, &auio,
+			    &mp, (struct mbuf **)0, &rcvflg);
  			if (error || auio.uio_resid > 0) {
  				if (error && error != ECONNRESET) {
  					log(LOG_ERR, 
@@ -1058,9 +1055,7 @@
  	auio.uio_resid = 1000000000;
  	do {
  		mp = control = NULL;
-		error = so->so_proto->pr_usrreqs->pru_soreceive(so,
-					NULL, &auio, &mp,
-					&control, &rcvflag);
+		error = soreceive(so, NULL, &auio, &mp, &control, &rcvflag);
  		if (control)
  			m_freem(control);
  		if (mp)
--- //depot/vendor/freebsd/src/sys/nfsserver/nfs_srvsock.c	2006/04/06 23:35:17
+++ //depot/user/rwatson/socleanup/src/sys/nfsserver/nfs_srvsock.c	2006/07/23 15:32:54
@@ -466,8 +466,7 @@
  		auio.uio_resid = 1000000000;
  		flags = MSG_DONTWAIT;
  		NFSD_UNLOCK();
-		error = so->so_proto->pr_usrreqs->pru_soreceive
-			(so, &nam, &auio, &mp, NULL, &flags);
+		error = soreceive(so, &nam, &auio, &mp, NULL, &flags);
  		NFSD_LOCK();
  		if (error || mp == NULL) {
  			if (error == EWOULDBLOCK)
@@ -503,8 +502,7 @@
  			auio.uio_resid = 1000000000;
  			flags = MSG_DONTWAIT;
  			NFSD_UNLOCK();
-			error = so->so_proto->pr_usrreqs->pru_soreceive
-				(so, &nam, &auio, &mp, NULL, &flags);
+			error = soreceive(so, &nam, &auio, &mp, NULL, &flags);
  			if (mp) {
  				struct nfsrv_rec *rec;
  				rec = malloc(sizeof(struct nfsrv_rec),
@@ -785,8 +783,7 @@
  	else
  		flags = 0;

-	error = so->so_proto->pr_usrreqs->pru_sosend(so, sendnam, 0, top, 0,
-						     flags, curthread/*XXX*/);
+	error = sosend(so, sendnam, 0, top, 0, flags, curthread/*XXX*/);
  	if (error == ENOBUFS && so->so_type == SOCK_DGRAM)
  		error = 0;

--- //depot/vendor/freebsd/src/sys/sys/protosw.h	2006/07/14 10:06:50
+++ //depot/user/rwatson/socleanup/src/sys/sys/protosw.h	2006/07/23 15:32:54
@@ -228,15 +228,6 @@
  	int	(*pru_sense)(struct socket *so, struct stat *sb);
  	int	(*pru_shutdown)(struct socket *so);
  	int	(*pru_sockaddr)(struct socket *so, struct sockaddr **nam);
- 
-	/*
-	 * These four added later, so they are out of order.  They are used
-	 * for shortcutting (fast path input/output) in some protocols.
-	 * XXX - that's a lie, they are not implemented yet
-	 * Rather than calling sosend() etc. directly, calls are made
-	 * through these entry points.  For protocols which still use
-	 * the generic code, these just point to those routines.
-	 */
  	int	(*pru_sosend)(struct socket *so, struct sockaddr *addr,
  		    struct uio *uio, struct mbuf *top, struct mbuf *control,
  		    int flags, struct thread *td);
--- //depot/vendor/freebsd/src/sys/sys/socketvar.h	2006/06/17 22:50:57
+++ //depot/user/rwatson/socleanup/src/sys/sys/socketvar.h	2006/07/23 15:32:54
@@ -532,6 +532,9 @@
  	    struct thread *td);
  int	soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio,
  	    struct mbuf **mp0, struct mbuf **controlp, int *flagsp);
+int	soreceive_generic(struct socket *so, struct sockaddr **paddr,
+	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
+	    int *flagsp);
  int	soreserve(struct socket *so, u_long sndcc, u_long rcvcc);
  void	sorflush(struct socket *so);
  int	sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
@@ -540,6 +543,9 @@
  int	sosend_dgram(struct socket *so, struct sockaddr *addr,
  	    struct uio *uio, struct mbuf *top, struct mbuf *control,
  	    int flags, struct thread *td);
+int	sosend_generic(struct socket *so, struct sockaddr *addr,
+	    struct uio *uio, struct mbuf *top, struct mbuf *control,
+	    int flags, struct thread *td);
  int	sosetopt(struct socket *so, struct sockopt *sopt);
  int	soshutdown(struct socket *so, int how);
  void	sotoxsocket(struct socket *so, struct xsocket *xso);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20060723171734.K35186>