Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 2 Feb 2001 01:07:25 +0000
From:      Tony Finch <dot@dotat.at>
To:        Garrett Wollman <wollman@khavrinen.lcs.mit.edu>
Cc:        freebsd-net@FreeBSD.ORG
Subject:   proper uncorking for TCP_NOPUSH, was Re: sendfile()
Message-ID:  <20010202010725.Z70673@hand.dotat.at>
In-Reply-To: <20010201230644.W70673@hand.dotat.at>
References:  <1217774688.20010201133139@163.net> <20010201023825.A71975@xor.obsecurity.org> <20010201180010.Q70673@hand.dotat.at> <200102012010.PAA79234@khavrinen.lcs.mit.edu> <20010201230644.W70673@hand.dotat.at>

next in thread | previous in thread | raw e-mail | index | archive | help
Tony Finch <dot@dotat.at> wrote:
>Garrett Wollman <wollman@khavrinen.lcs.mit.edu> wrote:
>>Tony Finch <dot@dotat.at> wrote:
>>>
>>> For this reason turning off TCP_CORK pushes out queued data, but
>>> this isn't the case for TCP_NOPUSH.
>>
>>This is a long-standing bug.  You are welcome to fix it.
>
>I've been looking at this and it seems to me to be simply a case of
>calling tcp_output() from tcp_ctloutput() if TF_NOPUSH is cleared.

OK, apart from my previous patch containing dumb cut&paste errors it
works :-)

I have a program (below) which forks; the parent listens for a
connection and then reads from it reporting the size of the reads; the
child connects to the parent, sets TCP_NOPUSH, writes 10000 single
bytes, unsets TCP_NOPUSH, sleeps for 10 seconds, then exits.

On an upatched FreeBSD the following happens. Note that the
single-byte writes get coalesced nicely. Note also that there is a six
second delay between the writer finishing and the readere receiving
the data.

:; ./a.out 
981075049.891658 reader init
981075050.898416 writer init
981075050.899179 reader go
981075050.902703 writer go
981075050.939887 write finished
981075055.898738 read 8192
981075055.899084 read 1808
981075060.948514 writer exit
981075060.949197 read 0
981075060.949237 reader exit

On a patched FreeBSD this happens. The reader gets the data as soon as
the writer turns off TCP_NOPUSH :-)

:; ./a.out 
981075103.647630 reader init
981075104.654745 writer init
981075104.655310 reader go
981075104.655433 writer go
981075104.690353 write finished
981075104.690863 read 8192
981075104.691037 read 1808
981075114.695098 writer exit
981075114.695640 read 0
981075114.695761 reader exit

Proper patch below.

Tony.
-- 
f.a.n.finch    fanf@covalent.net    dot@dotat.at
"Dead! And yet there he stands!"


Index: tcp_usrreq.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.51
diff -u -r1.51 tcp_usrreq.c
--- tcp_usrreq.c	2000/01/09 19:17:28	1.51
+++ tcp_usrreq.c	2001/02/02 00:21:01
@@ -896,7 +896,6 @@
 		switch (sopt->sopt_name) {
 		case TCP_NODELAY:
 		case TCP_NOOPT:
-		case TCP_NOPUSH:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
@@ -909,9 +908,6 @@
 			case TCP_NOOPT:
 				opt = TF_NOOPT;
 				break;
-			case TCP_NOPUSH:
-				opt = TF_NOPUSH;
-				break;
 			default:
 				opt = 0; /* dead code to fool gcc */
 				break;
@@ -921,6 +917,20 @@
 				tp->t_flags |= opt;
 			else
 				tp->t_flags &= ~opt;
+			break;
+
+		case TCP_NOPUSH:
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+					    sizeof optval);
+			if (error)
+				break;
+
+			if (optval)
+				tp->t_flags |= TF_NOPUSH;
+			else {
+				tp->t_flags &= ~TF_NOPUSH;
+				error = tcp_output(tp);
+			}
 			break;
 
 		case TCP_MAXSEG:


#include <sys/types.h>
#include <sys/socket.h>
#include <sys/time.h>

#include <netinet/in.h>
#include <netinet/tcp.h>

#include <err.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

#define PORT_FOO 55555

static void
report(const char *fmt, ...) {
	struct timeval tv;
	va_list ap;
	char buf[80];

	va_start(ap, fmt);
	if(gettimeofday(&tv, NULL)) err(1, "gettimeofday");
	vsnprintf(buf, sizeof(buf), fmt, ap);
	printf("%ld.%06ld %s\n", tv.tv_sec, tv.tv_usec, buf);
	va_end(ap);
}

int
main(void) {
	int i, r, s;
	struct sockaddr_in addr;
	char buf[8192];

	setbuf(stdout, NULL);

	bzero(&addr, sizeof(addr));
	addr.sin_family = AF_INET;
	addr.sin_port = htons(PORT_FOO);
	addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);

	switch(fork()) {
	case(-1):
		err(1, "fork");
	case(0):
		/* child */
		sleep(1);
		report("writer init");
		s = socket(PF_INET, SOCK_STREAM, 0);
		if(s < 0) err(1, "socket");
		i = 1; r = setsockopt(s, IPPROTO_TCP, TCP_NOPUSH, &i, sizeof(i));
		if(r < 0) err(1, "setsockopt");
		r = connect(s, (struct sockaddr *)&addr, sizeof(addr));
		if(r < 0) err(1, "connect");
		report("writer go");
		*buf = 0;
		for(i = 0; i < 10000; i++) {
			r = write(s, buf, 1);
			if(r < 0) err(1, "write");
		}
		report("write finished");
		i = 0; r = setsockopt(s, IPPROTO_TCP, TCP_NOPUSH, &i, sizeof(i));
		if(r < 0) err(1, "setsockopt");
		sleep(10);
		report("writer exit");
		exit(0);
	default:
		/* parent */
		report("reader init");
		s = socket(PF_INET, SOCK_STREAM, 0);
		if(s < 0) err(1, "socket");
		i = 1; r = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &i, sizeof(i));
		if(r < 0) err(1, "setsockopt");
		r = bind(s, (struct sockaddr *)&addr, sizeof(addr));
		if(r < 0) err(1, "bind");
		r = listen(s, SOMAXCONN);
		if(r < 0) err(1, "listen");
		s = accept(s, (struct sockaddr *)&addr, &i);
		if(s < 0) err(1, "accept");
		report("reader go");
		do {
			r = read(s, buf, sizeof(buf));
			if(r < 0) err(1, "read");
			report("read %d", r);
		} while(r);
		report("reader exit");
		exit(0);
	}
	return(0);
}


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-net" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20010202010725.Z70673>