Date: Wed, 26 Aug 1998 21:55:19 -0400 (EDT) From: Garrett Wollman <wollman@khavrinen.lcs.mit.edu> To: net@FreeBSD.ORG Subject: Next big network patch: specialized sosend for TCP Message-ID: <199808270155.VAA07365@khavrinen.lcs.mit.edu>
next in thread | raw e-mail | index | archive | help
Here's the next patch that's going into the TCP stack. I am running
this right now, so I'm certain it's not completely bogus, but have not
stress-tested it as yet. It does appear to be somewhat faster (having
eliminated about a dozen branches), but -current is a very hostile
environment for microbenchmarks of the sort I would usually use.
Once again, any comments would be appreciated.
-GAWollman
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.38
diff -u -r1.38 tcp_usrreq.c
--- tcp_usrreq.c 1998/08/23 03:07:15 1.38
+++ tcp_usrreq.c 1998/08/27 01:44:26
@@ -73,6 +73,9 @@
struct proc *));
static struct tcpcb *
tcp_disconnect __P((struct tcpcb *));
+static int tcp_sosend __P((struct socket *, struct sockaddr *,
+ struct uio *, struct mbuf *, struct mbuf *,
+ int, struct proc *));
static struct tcpcb *
tcp_usrclosed __P((struct tcpcb *));
@@ -325,6 +328,10 @@
/*
* Do a send by putting data in output queue and updating urgent
* marker if URG set. Possibly send more data.
+ *
+ * XXX - this routine is really only here for the benefit of NFS.
+ * Somebody who knows the NFS code should figure out why NFS
+ * is going through here and where it should go.
*/
static int
tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
@@ -336,14 +343,12 @@
struct tcpcb *tp;
COMMON_START();
- if (control && control->m_len) {
- m_freem(control); /* XXX shouldn't caller do this??? */
- if (m)
- m_freem(m);
- error = EINVAL;
- goto out;
- }
-
+ /*
+ * We used to check for control information here, but
+ * tcp_sosend() doesn't call here, and any direct callers (i.e., NFS)
+ * should know enough to refrain from sending any since TCP
+ * has never supported control information.
+ */
if(!(flags & PRUS_OOB)) {
sbappend(&so->so_snd, m);
if (nam && tp->t_state < TCPS_SYN_SENT) {
@@ -459,7 +464,7 @@
tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach,
tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd,
tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
- in_setsockaddr, sosend, soreceive, sopoll
+ in_setsockaddr, tcp_sosend, soreceive, sopoll
};
/*
@@ -786,5 +791,230 @@
tp->t_timer[TCPT_2MSL] = tcp_maxidle;
}
return (tp);
+}
+
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <sys/uio.h>
+
+#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
+/*
+ * Specialized from kern/uipc_socket.c:sosend(). This isn't
+ * even close to fully optimized, but it has been run through
+ * a round of invisible constant propagation and dead code
+ * elimination. The (uio != 0) and (top != 0) should probably
+ * be separated by a single major branch, since the code
+ * is well interspersed at present (which is probably bad
+ * for branch prediction).
+ *
+ * Returns nonzero on error, timeout or signal; callers
+ * must check for short counts if EINTR/ERESTART are returned.
+ * Data and control buffers are freed on return.
+ */
+static int
+tcp_sosend(so, addr, uio, top, control, flags, p)
+ register struct socket *so;
+ struct sockaddr *addr;
+ struct uio *uio;
+ struct mbuf *top;
+ struct mbuf *control;
+ int flags;
+ struct proc *p;
+{
+ struct mbuf **mp;
+ register struct mbuf *m;
+ register long space, len, resid;
+ struct inpcb *inp;
+ struct tcpcb *tp;
+ int error, s, mlen;
+ TCPDEBUG0;
+
+ /*
+ * I believe that a socket can never become ``bare''
+ * during the execution of this routine. (I don't think
+ * that a socket can ever become ``bare'' except for brief
+ * moments during initialization and rundown, but haven't
+ * proven that.) (Of course, I haven't proven this, either.
+ * We'll see.)
+ */
+ if ((inp = sotoinpcb(so)) == 0) {
+ error = EINVAL;
+ goto release;
+ }
+ tp = intotcpcb(inp);
+
+ if (uio)
+ resid = uio->uio_resid;
+ else
+ resid = top->m_pkthdr.len;
+ /*
+ * In theory resid should be unsigned.
+ * However, space must be signed, as it might be less than 0
+ * if we over-committed, and we must use a signed comparison
+ * of space and resid. On the other hand, a negative resid
+ * causes us to loop sending 0-length segments to the protocol.
+ * Also bail early if we get control information -- TCP doesn't
+ * support that.
+ */
+ if (resid < 0 || (control && control->m_len) || inp == 0) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if (p)
+ p->p_stats->p_ru.ru_msgsnd++;
+
+restart:
+ error = sblock(&so->so_snd, SBLOCKWAIT(flags));
+ if (error)
+ goto out;
+ do {
+ s = splnet();
+#define snderr(errno) do { error = errno; splx(s); goto release; } while (0)
+ if (so->so_state & SS_CANTSENDMORE)
+ snderr(EPIPE);
+ if (so->so_error) {
+ error = so->so_error;
+ so->so_error = 0;
+ splx(s);
+ goto release;
+ }
+ if ((so->so_state & SS_ISCONNECTED) == 0
+ && addr == 0)
+ snderr(ENOTCONN);
+
+ space = sbspace(&so->so_snd);
+ if (flags & MSG_OOB)
+ space += 1024; /* XXX totally arbitrary */
+ if (uio == 0 && resid > so->so_snd.sb_hiwat)
+ snderr(EMSGSIZE);
+ if (space < resid && uio && space < so->so_snd.sb_lowat) {
+ if (so->so_state & SS_NBIO)
+ snderr(EWOULDBLOCK);
+ sbunlock(&so->so_snd);
+ error = sbwait(&so->so_snd);
+ splx(s);
+ if (error)
+ goto out;
+ goto restart;
+ }
+ splx(s);
+ mp = ⊤
+ do {
+ if (uio == NULL) {
+ /*
+ * Data is prepackaged in "top".
+ */
+ resid = 0;
+ } else do {
+ if (top == 0) {
+ MGETHDR(m, M_WAIT, MT_DATA);
+ mlen = MHLEN;
+ m->m_pkthdr.len = 0;
+ m->m_pkthdr.rcvif = (struct ifnet *)0;
+ } else {
+ MGET(m, M_WAIT, MT_DATA);
+ mlen = MLEN;
+ }
+ if (resid >= MINCLSIZE) {
+ MCLGET(m, M_WAIT);
+ if ((m->m_flags & M_EXT) == 0)
+ goto nopages;
+ mlen = MCLBYTES;
+ len = min(min(mlen, resid), space);
+ } else {
+nopages:
+ len = min(min(mlen, resid), space);
+ }
+ space -= len;
+ error = uiomove(mtod(m, caddr_t), (int)len, uio);
+ resid = uio->uio_resid;
+ m->m_len = len;
+ *mp = m;
+ top->m_pkthdr.len += len;
+ if (error)
+ goto release;
+ mp = &m->m_next;
+ if (resid <= 0) {
+ break;
+ }
+ } while (space > 0);
+ s = splnet(); /* XXX */
+ TCPDEBUG1();
+ /*
+ * XXX -- should be possible to perform this check
+ * out of the loop.
+ */
+ if ((flags & MSG_OOB) && sbspace(&so->so_snd) < -512) {
+ splx(s);
+ snderr(ENOBUFS);
+ }
+ sbappend(&so->so_snd, top);
+
+ /*
+ * Do implied connect if not yet connected,
+ * initialize window to default value, and
+ * initialize maxseg/maxopd using peer's cached
+ * MSS.
+ */
+ if (addr && tp->t_state < TCPS_SYN_SENT) {
+ error = tcp_connect(tp, addr, p);
+ if (error)
+ goto out;
+ tp->snd_wnd = TTCP_CLIENT_SND_WND;
+ tcp_mss(tp, -1);
+ }
+
+ if (flags & MSG_OOB) {
+ /*
+ * According to RFC961 (Assigned Protocols),
+ * the urgent pointer points to the last octet
+ * of urgent data. We continue, however,
+ * to consider it to indicate the first octet
+ * of data past the urgent section.
+ * Otherwise, snd_up should be one lower.
+ */
+ tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
+ tp->t_force = 1;
+ error = tcp_output(tp);
+ tp->t_force = 0;
+ TCPDEBUG2(PRU_SEND_OOB);
+ } else if ((flags & MSG_EOF) && resid <= 0) {
+ socantsendmore(so);
+ tp = tcp_usrclosed(tp);
+#ifdef DIAGNOSTIC
+ /*
+ * The only way tcp_usrclosed() can cause the
+ * tcpcb to go away entirely is if it was either
+ * CLOSED or LISTENing. In either state, we
+ * should never have gotten this far.
+ */
+ if (tp == 0)
+ panic("tcp_sosend: socket already closed");
+#endif
+ error = tcp_output(tp);
+ TCPDEBUG2(PRU_SEND_EOF);
+ } else {
+ error = tcp_output(tp);
+ TCPDEBUG2(PRU_SEND);
+ }
+ splx(s);
+ top = 0;
+ mp = ⊤
+ if (error)
+ goto release;
+ } while (resid && space > 0);
+ } while (resid);
+
+release:
+ sbunlock(&so->so_snd);
+out:
+ if (top)
+ m_freem(top);
+ if (control)
+ m_freem(control);
+ return (error);
}
To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-net" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199808270155.VAA07365>
