Date: Fri, 2 Feb 2001 01:07:25 +0000 From: Tony Finch <dot@dotat.at> To: Garrett Wollman <wollman@khavrinen.lcs.mit.edu> Cc: freebsd-net@FreeBSD.ORG Subject: proper uncorking for TCP_NOPUSH, was Re: sendfile() Message-ID: <20010202010725.Z70673@hand.dotat.at> In-Reply-To: <20010201230644.W70673@hand.dotat.at> References: <1217774688.20010201133139@163.net> <20010201023825.A71975@xor.obsecurity.org> <20010201180010.Q70673@hand.dotat.at> <200102012010.PAA79234@khavrinen.lcs.mit.edu> <20010201230644.W70673@hand.dotat.at>
next in thread | previous in thread | raw e-mail | index | archive | help
Tony Finch <dot@dotat.at> wrote: >Garrett Wollman <wollman@khavrinen.lcs.mit.edu> wrote: >>Tony Finch <dot@dotat.at> wrote: >>> >>> For this reason turning off TCP_CORK pushes out queued data, but >>> this isn't the case for TCP_NOPUSH. >> >>This is a long-standing bug. You are welcome to fix it. > >I've been looking at this and it seems to me to be simply a case of >calling tcp_output() from tcp_ctloutput() if TF_NOPUSH is cleared. OK, apart from my previous patch containing dumb cut&paste errors it works :-) I have a program (below) which forks; the parent listens for a connection and then reads from it reporting the size of the reads; the child connects to the parent, sets TCP_NOPUSH, writes 10000 single bytes, unsets TCP_NOPUSH, sleeps for 10 seconds, then exits. On an upatched FreeBSD the following happens. Note that the single-byte writes get coalesced nicely. Note also that there is a six second delay between the writer finishing and the readere receiving the data. :; ./a.out 981075049.891658 reader init 981075050.898416 writer init 981075050.899179 reader go 981075050.902703 writer go 981075050.939887 write finished 981075055.898738 read 8192 981075055.899084 read 1808 981075060.948514 writer exit 981075060.949197 read 0 981075060.949237 reader exit On a patched FreeBSD this happens. The reader gets the data as soon as the writer turns off TCP_NOPUSH :-) :; ./a.out 981075103.647630 reader init 981075104.654745 writer init 981075104.655310 reader go 981075104.655433 writer go 981075104.690353 write finished 981075104.690863 read 8192 981075104.691037 read 1808 981075114.695098 writer exit 981075114.695640 read 0 981075114.695761 reader exit Proper patch below. Tony. -- f.a.n.finch fanf@covalent.net dot@dotat.at "Dead! And yet there he stands!" Index: tcp_usrreq.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.51 diff -u -r1.51 tcp_usrreq.c --- tcp_usrreq.c 2000/01/09 19:17:28 1.51 +++ tcp_usrreq.c 2001/02/02 00:21:01 @@ -896,7 +896,6 @@ switch (sopt->sopt_name) { case TCP_NODELAY: case TCP_NOOPT: - case TCP_NOPUSH: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) @@ -909,9 +908,6 @@ case TCP_NOOPT: opt = TF_NOOPT; break; - case TCP_NOPUSH: - opt = TF_NOPUSH; - break; default: opt = 0; /* dead code to fool gcc */ break; @@ -921,6 +917,20 @@ tp->t_flags |= opt; else tp->t_flags &= ~opt; + break; + + case TCP_NOPUSH: + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + break; + + if (optval) + tp->t_flags |= TF_NOPUSH; + else { + tp->t_flags &= ~TF_NOPUSH; + error = tcp_output(tp); + } break; case TCP_MAXSEG: #include <sys/types.h> #include <sys/socket.h> #include <sys/time.h> #include <netinet/in.h> #include <netinet/tcp.h> #include <err.h> #include <stdarg.h> #include <stdio.h> #include <string.h> #include <unistd.h> #define PORT_FOO 55555 static void report(const char *fmt, ...) { struct timeval tv; va_list ap; char buf[80]; va_start(ap, fmt); if(gettimeofday(&tv, NULL)) err(1, "gettimeofday"); vsnprintf(buf, sizeof(buf), fmt, ap); printf("%ld.%06ld %s\n", tv.tv_sec, tv.tv_usec, buf); va_end(ap); } int main(void) { int i, r, s; struct sockaddr_in addr; char buf[8192]; setbuf(stdout, NULL); bzero(&addr, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_port = htons(PORT_FOO); addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); switch(fork()) { case(-1): err(1, "fork"); case(0): /* child */ sleep(1); report("writer init"); s = socket(PF_INET, SOCK_STREAM, 0); if(s < 0) err(1, "socket"); i = 1; r = setsockopt(s, IPPROTO_TCP, TCP_NOPUSH, &i, sizeof(i)); if(r < 0) err(1, "setsockopt"); r = connect(s, (struct sockaddr *)&addr, sizeof(addr)); if(r < 0) err(1, "connect"); report("writer go"); *buf = 0; for(i = 0; i < 10000; i++) { r = write(s, buf, 1); if(r < 0) err(1, "write"); } report("write finished"); i = 0; r = setsockopt(s, IPPROTO_TCP, TCP_NOPUSH, &i, sizeof(i)); if(r < 0) err(1, "setsockopt"); sleep(10); report("writer exit"); exit(0); default: /* parent */ report("reader init"); s = socket(PF_INET, SOCK_STREAM, 0); if(s < 0) err(1, "socket"); i = 1; r = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &i, sizeof(i)); if(r < 0) err(1, "setsockopt"); r = bind(s, (struct sockaddr *)&addr, sizeof(addr)); if(r < 0) err(1, "bind"); r = listen(s, SOMAXCONN); if(r < 0) err(1, "listen"); s = accept(s, (struct sockaddr *)&addr, &i); if(s < 0) err(1, "accept"); report("reader go"); do { r = read(s, buf, sizeof(buf)); if(r < 0) err(1, "read"); report("read %d", r); } while(r); report("reader exit"); exit(0); } return(0); } To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-net" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20010202010725.Z70673>