Date: Mon, 20 Oct 2003 16:28:07 -0700 (PDT) From: Sam Leffler <sam@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 40035 for review Message-ID: <200310202328.h9KNS75u099074@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=40035 Change 40035 by sam@sam_ebb on 2003/10/20 16:27:57 jason thorpe's tail optimizations for stream sockets, as passed on by ps/jayanth and fixed by me add global SOCKBUF_DEBUG option to control debugging code Affected files ... .. //depot/projects/netperf/sys/conf/NOTES#12 edit .. //depot/projects/netperf/sys/conf/options#11 edit .. //depot/projects/netperf/sys/kern/uipc_socket.c#3 edit .. //depot/projects/netperf/sys/kern/uipc_socket2.c#3 edit .. //depot/projects/netperf/sys/netinet/tcp_input.c#5 edit .. //depot/projects/netperf/sys/netinet/tcp_usrreq.c#2 edit .. //depot/projects/netperf/sys/sys/socketvar.h#3 edit Differences ... ==== //depot/projects/netperf/sys/conf/NOTES#12 (text+ko) ==== @@ -2282,6 +2282,7 @@ # Debug options options BUS_DEBUG # enable newbus debugging options DEBUG_VFS_LOCKS # enable vfs lock debugging +options SOCKBUF_DEBUG # enable sockbuf last record/mb tail checking ##################################################################### # SYSV IPC KERNEL PARAMETERS ==== //depot/projects/netperf/sys/conf/options#11 (text+ko) ==== @@ -618,6 +618,8 @@ # XXX bogusly global. DEVICE_POLLING opt_global.h +SOCKBUF_DEBUG opt_global.h + # options for ubsec driver UBSEC_DEBUG opt_ubsec.h UBSEC_RNDTEST opt_ubsec.h ==== //depot/projects/netperf/sys/kern/uipc_socket.c#3 (text+ko) ==== @@ -887,6 +887,8 @@ error = EWOULDBLOCK; goto release; } + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); sbunlock(&so->so_rcv); error = sbwait(&so->so_rcv); splx(s); @@ -897,6 +899,8 @@ dontblock: if (uio->uio_td) uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++; + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); nextrecord = m->m_nextpkt; if (pr->pr_flags & PR_ADDR) { KASSERT(m->m_type == MT_SONAME, @@ -938,12 +942,32 @@ } } if (m) { - if ((flags & MSG_PEEK) == 0) + if ((flags & MSG_PEEK) == 0) { m->m_nextpkt = nextrecord; + /* + * If nextrecord == NULL (this is a single chain), + * then sb_lastrecord may not be valid here if m + * was changed earlier. + */ + if (nextrecord == NULL) { + KASSERT(so->so_rcv.sb_mb == m, + ("receive tailq 1")); + so->so_rcv.sb_lastrecord = m; + } + } type = m->m_type; if (type == MT_OOBDATA) flags |= MSG_OOB; + } else { + if ((flags & MSG_PEEK) == 0) { + KASSERT(so->so_rcv.sb_mb == m,("receive tailq 2")); + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } } + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); + moff = 0; offset = 0; while (m && uio->uio_resid > 0 && error == 0) { @@ -970,6 +994,8 @@ * block interrupts again. */ if (mp == 0) { + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); splx(s); #ifdef ZERO_COPY_SOCKETS if (so_zero_copy_receive) { @@ -1017,8 +1043,16 @@ so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; } - if (m) + if (m) { m->m_nextpkt = nextrecord; + if (nextrecord == NULL) + so->so_rcv.sb_lastrecord = m; + } else { + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); } } else { if (flags & MSG_PEEK) @@ -1063,6 +1097,8 @@ */ if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreqs->pru_rcvd)(so, flags); + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); error = sbwait(&so->so_rcv); if (error) { sbunlock(&so->so_rcv); @@ -1081,8 +1117,21 @@ (void) sbdroprecord(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { - if (m == 0) + if (m == 0) { + /* + * First part is an inline SB_EMPTY_FIXUP(). Second + * part makes sure sb_lastrecord is up-to-date if + * there is still data in the socket buffer. + */ so->so_rcv.sb_mb = nextrecord; + if (so->so_rcv.sb_mb == NULL) { + so->so_rcv.sb_mbtail = NULL; + so->so_rcv.sb_lastrecord = NULL; + } else if (nextrecord->m_nextpkt == NULL) + so->so_rcv.sb_lastrecord = nextrecord; + } + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreqs->pru_rcvd)(so, flags); } ==== //depot/projects/netperf/sys/kern/uipc_socket2.c#3 (text+ko) ==== @@ -468,6 +468,60 @@ * or sbdroprecord() when the data is acknowledged by the peer. */ +#ifdef SOCKBUF_DEBUG +void +sblastrecordchk(struct sockbuf *sb, const char *file, int line) +{ + struct mbuf *m = sb->sb_mb; + + while (m && m->m_nextpkt) + m = m->m_nextpkt; + + if (m != sb->sb_lastrecord) { + printf("%s: sb_mb %p sb_lastrecord %p last %p\n", + __func__, sb->sb_mb, sb->sb_lastrecord, m); + printf("packet chain:\n"); + for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) + printf("\t%p\n", m); + panic("%s from %s:%u", __func__, file, line); + } +} + +void +sblastmbufchk(struct sockbuf *sb, const char *file, int line) +{ + struct mbuf *m = sb->sb_mb; + struct mbuf *n; + + while (m && m->m_nextpkt) + m = m->m_nextpkt; + + while (m && m->m_next) + m = m->m_next; + + if (m != sb->sb_mbtail) { + printf("%s: sb_mb %p sb_mbtail %p last %p\n", + __func__, sb->sb_mb, sb->sb_mbtail, m); + printf("packet tree:\n"); + for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { + printf("\t"); + for (n = m; n != NULL; n = n->m_next) + printf("%p ", n); + printf("\n"); + } + panic("%s from %s:%u", __func__, file, line); + } +} +#endif /* SOCKBUF_DEBUG */ + +#define SBLINKRECORD(sb, m0) do { \ + if ((sb)->sb_lastrecord != NULL) \ + (sb)->sb_lastrecord->m_nextpkt = (m0); \ + else \ + (sb)->sb_mb = (m0); \ + (sb)->sb_lastrecord = (m0); \ +} while (/*CONSTCOND*/0) + /* * Append mbuf chain m to the last record in the * socket buffer sb. The additional space associated @@ -483,6 +537,7 @@ if (m == 0) return; + SBLASTRECORDCHK(sb); n = sb->sb_mb; if (n) { while (n->m_nextpkt) @@ -493,8 +548,53 @@ return; } } while (n->m_next && (n = n->m_next)); + } else { + /* + * XXX Would like to simply use sb_mbtail here, but + * XXX I need to verify that I won't miss an EOR that + * XXX way. + */ + if ((n = sb->sb_lastrecord) != NULL) { + do { + if (n->m_flags & M_EOR) { + sbappendrecord(sb, m); /* XXXXXX!!!! */ + return; + } + } while (n->m_next && (n = n->m_next)); + } else { + /* + * If this is the first record in the socket buffer, + * it's also the last record. + */ + sb->sb_lastrecord = m; + } } sbcompress(sb, m, n); + SBLASTRECORDCHK(sb); +} + +/* + * This version of sbappend() should only be used when the caller + * absolutely knows that there will never be more than one record + * in the socket buffer, that is, a stream protocol (such as TCP). + */ +void +sbappendstream(struct sockbuf *sb, struct mbuf *m) +{ + + KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); + KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); + + SBLASTMBUFCHK(sb); + +#ifdef MBUFTRACE + m_claim(m, sb->sb_mowner); +#endif + + sbcompress(sb, m, sb->sb_mbtail); + + sb->sb_lastrecord = sb->sb_mb; + SBLASTRECORDCHK(sb); } #ifdef SOCKBUF_DEBUG @@ -516,7 +616,7 @@ } } if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { - printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc, + printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc, mbcnt, sb->sb_mbcnt); panic("sbcheck"); } @@ -545,6 +645,8 @@ * Note this permits zero length records. */ sballoc(sb, m0); + SBLASTRECORDCHK(sb); + SBLINKRECORD(sb, m0); if (m) m->m_nextpkt = m0; else @@ -616,7 +718,7 @@ struct sockaddr *asa; struct mbuf *m0, *control; { - struct mbuf *m, *n; + struct mbuf *m, *n, *nlast; int space = asa->sa_len; if (m0 && (m0->m_flags & M_PKTHDR) == 0) @@ -640,15 +742,16 @@ else control = m0; m->m_next = control; - for (n = m; n; n = n->m_next) + for (n = m; n->m_next != NULL; n = n->m_next) sballoc(sb, n); - n = sb->sb_mb; - if (n) { - while (n->m_nextpkt) - n = n->m_nextpkt; - n->m_nextpkt = m; - } else - sb->sb_mb = m; + sballoc(sb, n); + nlast = n; + SBLINKRECORD(sb, m); + + sb->sb_mbtail = nlast; + SBLASTMBUFCHK(sb); + + SBLASTRECORDCHK(sb); return (1); } @@ -657,7 +760,7 @@ struct sockbuf *sb; struct mbuf *control, *m0; { - struct mbuf *m, *n; + struct mbuf *m, *n, *mlast; int space; if (control == 0) @@ -666,15 +769,19 @@ if (space > sbspace(sb)) return (0); n->m_next = m0; /* concatenate data to control */ - for (m = control; m; m = m->m_next) + + SBLASTRECORDCHK(sb); + + for (m = control; m->m_next; m = m->m_next) sballoc(sb, m); - n = sb->sb_mb; - if (n) { - while (n->m_nextpkt) - n = n->m_nextpkt; - n->m_nextpkt = control; - } else - sb->sb_mb = control; + sballoc(sb, m); + mlast = m; + SBLINKRECORD(sb, control); + + sb->sb_mbtail = mlast; + SBLASTMBUFCHK(sb); + + SBLASTRECORDCHK(sb); return (1); } @@ -697,6 +804,8 @@ (eor == 0 || (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) { + if (sb->sb_lastrecord == m) + sb->sb_lastrecord = m->m_next; m = m_free(m); continue; } @@ -720,6 +829,7 @@ n->m_next = m; else sb->sb_mb = m; + sb->sb_mbtail = m; sballoc(sb, m); n = m; m->m_flags &= ~M_EOR; @@ -732,6 +842,7 @@ else printf("semi-panic: sbcompress\n"); } + SBLASTMBUFCHK(sb); } /* @@ -800,6 +911,18 @@ m->m_nextpkt = next; } else sb->sb_mb = next; + /* + * First part is an inline SB_EMPTY_FIXUP(). Second part + * makes sure sb_lastrecord is up-to-date if we dropped + * part of the last record. + */ + m = sb->sb_mb; + if (m == NULL) { + sb->sb_mbtail = NULL; + sb->sb_lastrecord = NULL; + } else if (m->m_nextpkt == NULL) { + sb->sb_lastrecord = m; + } } /* @@ -820,6 +943,7 @@ m = m_free(m); } while (m); } + SB_EMPTY_FIXUP(sb); } /* ==== //depot/projects/netperf/sys/netinet/tcp_input.c#5 (text+ko) ==== @@ -296,7 +296,7 @@ if (so->so_state & SS_CANTRCVMORE) m_freem(q->tqe_m); else - sbappend(&so->so_rcv, q->tqe_m); + sbappendstream(&so->so_rcv, q->tqe_m); FREE(q, M_TSEGQ); q = nq; } while (q && q->tqe_th->th_seq == tp->rcv_nxt); @@ -1124,7 +1124,7 @@ m_freem(m); } else { m_adj(m, drop_hdrlen); /* delayed header drop */ - sbappend(&so->so_rcv, m); + sbappendstream(&so->so_rcv, m); } sorwakeup(so); if (DELAY_ACK(tp)) { @@ -2178,7 +2178,7 @@ if (so->so_state & SS_CANTRCVMORE) m_freem(m); else - sbappend(&so->so_rcv, m); + sbappendstream(&so->so_rcv, m); sorwakeup(so); } else { thflags = tcp_reass(tp, th, &tlen, m); ==== //depot/projects/netperf/sys/netinet/tcp_usrreq.c#2 (text+ko) ==== @@ -685,7 +685,7 @@ m_freem(control); /* empty control, just free it */ } if (!(flags & PRUS_OOB)) { - sbappend(&so->so_snd, m); + sbappendstream(&so->so_snd, m); if (nam && tp->t_state < TCPS_SYN_SENT) { /* * Do implied connect if not yet connected, @@ -734,7 +734,7 @@ * of data past the urgent section. * Otherwise, snd_up should be one lower. */ - sbappend(&so->so_snd, m); + sbappendstream(&so->so_snd, m); if (nam && tp->t_state < TCPS_SYN_SENT) { /* * Do implied connect if not yet connected, ==== //depot/projects/netperf/sys/sys/socketvar.h#3 (text+ko) ==== @@ -102,6 +102,9 @@ struct selinfo sb_sel; /* process selecting read/write */ #define sb_startzero sb_mb struct mbuf *sb_mb; /* the mbuf chain */ + struct mbuf *sb_mbtail; /* the last mbuf in the chain */ + struct mbuf *sb_lastrecord; /* first mbuf of last record in + * socket buffer */ u_int sb_cc; /* actual chars in buffer */ u_int sb_hiwat; /* max actual char count */ u_int sb_mbcnt; /* chars of mbufs used */ @@ -137,6 +140,13 @@ } *so_accf; }; +#define SB_EMPTY_FIXUP(sb) do { \ + if ((sb)->sb_mb == NULL) { \ + (sb)->sb_mbtail = NULL; \ + (sb)->sb_lastrecord = NULL; \ + } \ +} while (/*CONSTCOND*/0) + /* * Socket state bits. */ @@ -353,6 +363,7 @@ int sockargs(struct mbuf **mp, caddr_t buf, int buflen, int type); int getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len); void sbappend(struct sockbuf *sb, struct mbuf *m); +void sbappendstream(struct sockbuf *sb, struct mbuf *m); int sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, @@ -422,6 +433,17 @@ void sotoxsocket(struct socket *so, struct xsocket *xso); void sowakeup(struct socket *so, struct sockbuf *sb); +#ifdef SOCKBUF_DEBUG +void sblastrecordchk(struct sockbuf *, const char *, int); +#define SBLASTRECORDCHK(sb) sblastrecordchk((sb), __FILE__, __LINE__) + +void sblastmbufchk(struct sockbuf *, const char *, int); +#define SBLASTMBUFCHK(sb) sblastmbufchk((sb), __FILE__, __LINE__) +#else +#define SBLASTRECORDCHK(sb) /* nothing */ +#define SBLASTMBUFCHK(sb) /* nothing */ +#endif /* SOCKBUF_DEBUG */ + /* * Accept filter functions (duh). */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200310202328.h9KNS75u099074>