Date: Thu, 08 Feb 2001 22:27:25 +0000 From: Tony Finch <dot@dotat.at> To: FreeBSD-gnats-submit@freebsd.org Subject: kern/24959: proper TCP_NOPUSH/TCP_CORK compatibility Message-ID: <E14QzXB-00024t-00@hand.dotat.at>
next in thread | raw e-mail | index | archive | help
>Number: 24959
>Category: kern
>Synopsis: proper TCP_NOPUSH/TCP_CORK compatibility
>Confidential: no
>Severity: non-critical
>Priority: low
>Responsible: freebsd-bugs
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: change-request
>Submitter-Id: current-users
>Arrival-Date: Thu Feb 08 14:30:01 PST 2001
>Closed-Date:
>Last-Modified:
>Originator: Tony Finch
>Release: FreeBSD 4.2-STABLE i386
>Organization:
Covalent Technologies, Inc.
>Environment:
FreeBSD hand.dotat.at 4.2-STABLE FreeBSD 4.2-STABLE #2: Thu Feb 8 08:45:48 GMT 2001 fanf@hand.dotat.at:/FreeBSD/obj/FreeBSD/releng4/sys/DELL-Latitude-CSx i386
>Description:
Jonathan Lemon recently committed a fix to the implementation of
TCP_NOPUSH that expedites the output of the last packet in a sequence
(rev. 1.53 of tcp_usrreq.c). However there are a number of problems
with that commit, as follows:
* the change hsan't been documented
* although the new functionality is similar to Linux's
TCP_CORK, the Linuxulator hasn't been updated to implement
that support.
* TCP_NOPUSH has additional T/TCP-related semantics that may be
undesirable in an application that only wants the TCP_CORK
semantics. If the option is set on a listening socket then it
enables TAO.
This patch introduces a new TCP option called TCP_CORK which is
intended to be exactly the same as the Linux version. The old
tcp_output part of the TCP_NOPUSH functionality is renamed to use
TCP_CORK. TCP_NOPUSH is now implemented as a superset of TCP_CORK
that also includes the T/TCP functionality in tcp_input.
I've included two patches below, one against -STABLE and one against
-CURRENT. They're the same apart from the change in tcp_usrreq.c in
-CURRENT.
>How-To-Repeat:
>Fix:
--------------------------------------------------------------------------------
This patch is against RELENG_4
Index: alpha/linux/linux.h
===================================================================
RCS file: /home/ncvs/src/sys/alpha/linux/linux.h,v
retrieving revision 1.46.2.2
diff -u -r1.46.2.2 linux.h
--- alpha/linux/linux.h 2000/11/22 15:22:43 1.46.2.2
+++ alpha/linux/linux.h 2001/02/05 23:35:51
@@ -377,6 +377,10 @@
#define LINUX_IP_ADD_MEMBERSHIP 35
#define LINUX_IP_DROP_MEMBERSHIP 36
+#define LINUX_TCP_NODELAY 1
+#define LINUX_TCP_MAXSEG 2
+#define LINUX_TCP_CORK 3
+
struct linux_sockaddr {
u_short sa_family;
char sa_data[14];
Index: compat/linux/linux_socket.c
===================================================================
RCS file: /home/ncvs/src/sys/compat/linux/linux_socket.c,v
retrieving revision 1.19.2.3
diff -u -r1.19.2.3 linux_socket.c
--- compat/linux/linux_socket.c 2000/11/22 15:39:01 1.19.2.3
+++ compat/linux/linux_socket.c 2001/02/08 21:25:38
@@ -46,6 +46,7 @@
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
+#include <netinet/tcp.h>
#include <machine/../linux/linux.h>
#ifdef __alpha__
@@ -89,6 +90,21 @@
}
static int
+linux_to_bsd_tcp_sockopt(int opt)
+{
+
+ switch (opt) {
+ case LINUX_TCP_NODELAY:
+ return (TCP_NODELAY);
+ case LINUX_TCP_MAXSEG:
+ return (TCP_MAXSEG);
+ case LINUX_TCP_CORK:
+ return (TCP_CORK);
+ }
+ return (-1);
+}
+
+static int
linux_to_bsd_ip_sockopt(int opt)
{
@@ -768,8 +784,7 @@
name = linux_to_bsd_ip_sockopt(linux_args.optname);
break;
case IPPROTO_TCP:
- /* Linux TCP option values match BSD's */
- name = linux_args.optname;
+ name = linux_to_bsd_tcp_sockopt(linux_args.optname);
break;
default:
name = -1;
Index: i386/linux/linux.h
===================================================================
RCS file: /home/ncvs/src/sys/i386/linux/linux.h,v
retrieving revision 1.41.2.1
diff -u -r1.41.2.1 linux.h
--- i386/linux/linux.h 2000/05/09 17:41:24 1.41.2.1
+++ i386/linux/linux.h 2001/02/05 23:35:24
@@ -375,6 +375,10 @@
#define LINUX_IP_ADD_MEMBERSHIP 35
#define LINUX_IP_DROP_MEMBERSHIP 36
+#define LINUX_TCP_NODELAY 1
+#define LINUX_TCP_MAXSEG 2
+#define LINUX_TCP_CORK 3
+
struct linux_sockaddr {
u_short sa_family;
char sa_data[14];
Index: netinet/tcp.h
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp.h,v
retrieving revision 1.13.2.2
diff -u -r1.13.2.2 tcp.h
--- netinet/tcp.h 2001/01/09 18:25:18 1.13.2.2
+++ netinet/tcp.h 2001/02/08 21:10:37
@@ -129,7 +129,8 @@
*/
#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */
#define TCP_MAXSEG 0x02 /* set maximum segment size */
-#define TCP_NOPUSH 0x04 /* don't push last block of write */
+#define TCP_CORK 0x03 /* don't push last block of write */
+#define TCP_NOPUSH 0x04 /* ditto and also allow TAO on listen */
#define TCP_NOOPT 0x08 /* don't use TCP options */
#endif
Index: netinet/tcp_input.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.107.2.4
diff -u -r1.107.2.4 tcp_input.c
--- netinet/tcp_input.c 2000/08/16 06:14:23 1.107.2.4
+++ netinet/tcp_input.c 2001/02/08 21:23:29
@@ -817,7 +817,7 @@
#endif
tp = intotcpcb(inp);
tp->t_state = TCPS_LISTEN;
- tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT);
+ tp->t_flags |= tp0->t_flags & (TF_CORK|TF_NOOPT);
/* Compute proper scaling value from buffer space */
while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
Index: netinet/tcp_output.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.39.2.6
diff -u -r1.39.2.6 tcp_output.c
--- netinet/tcp_output.c 2000/09/13 04:27:06 1.39.2.6
+++ netinet/tcp_output.c 2001/02/08 21:23:07
@@ -280,7 +280,7 @@
goto send;
if (!(tp->t_flags & TF_MORETOCOME) &&
(idle || tp->t_flags & TF_NODELAY) &&
- (tp->t_flags & TF_NOPUSH) == 0 &&
+ (tp->t_flags & TF_CORK) == 0 &&
len + off >= so->so_snd.sb_cc)
goto send;
if (tp->t_force)
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.51
diff -u -r1.51 tcp_usrreq.c
--- netinet/tcp_usrreq.c 2000/01/09 19:17:28 1.51
+++ netinet/tcp_usrreq.c 2001/02/08 21:22:35
@@ -896,6 +896,7 @@
switch (sopt->sopt_name) {
case TCP_NODELAY:
case TCP_NOOPT:
+ case TCP_CORK:
case TCP_NOPUSH:
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
@@ -909,8 +910,11 @@
case TCP_NOOPT:
opt = TF_NOOPT;
break;
+ case TCP_CORK:
+ opt = TF_CORK;
+ break;
case TCP_NOPUSH:
- opt = TF_NOPUSH;
+ opt = TF_NOPUSH | TF_CORK;
break;
default:
opt = 0; /* dead code to fool gcc */
@@ -919,8 +923,11 @@
if (optval)
tp->t_flags |= opt;
- else
+ else {
tp->t_flags &= ~opt;
+ if (opt & TF_CORK)
+ error = tcp_output(tp);
+ }
break;
case TCP_MAXSEG:
Index: netinet/tcp_var.h
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.56.2.2
diff -u -r1.56.2.2 tcp_var.h
--- netinet/tcp_var.h 2000/08/16 06:14:23 1.56.2.2
+++ netinet/tcp_var.h 2001/02/08 21:25:09
@@ -89,12 +89,13 @@
#define TF_SACK_PERMIT 0x00200 /* other side said I could SACK */
#define TF_NEEDSYN 0x00400 /* send SYN (implicit state) */
#define TF_NEEDFIN 0x00800 /* send FIN (implicit state) */
-#define TF_NOPUSH 0x01000 /* don't push */
+#define TF_NOPUSH 0x01000 /* enable TAO on listen */
#define TF_REQ_CC 0x02000 /* have/will request CC */
#define TF_RCVD_CC 0x04000 /* a CC was received in SYN */
#define TF_SENDCCNEW 0x08000 /* send CCnew instead of CC in SYN */
#define TF_MORETOCOME 0x10000 /* More data to be appended to sock */
#define TF_LQ_OVERFLOW 0x20000 /* listen queue overflow */
+#define TF_CORK 0x40000 /* don't push */
int t_force; /* 1 if forcing out a byte */
tcp_seq snd_una; /* send unacknowledged */
--------------------------------------------------------------------------------
This patch is against HEAD
Index: alpha/linux/linux.h
===================================================================
RCS file: /home/ncvs/src/sys/alpha/linux/linux.h,v
retrieving revision 1.49
diff -u -r1.49 linux.h
--- alpha/linux/linux.h 2000/12/03 01:30:30 1.49
+++ alpha/linux/linux.h 2001/02/08 21:59:43
@@ -379,6 +379,10 @@
#define LINUX_IP_ADD_MEMBERSHIP 35
#define LINUX_IP_DROP_MEMBERSHIP 36
+#define LINUX_TCP_NODELAY 1
+#define LINUX_TCP_MAXSEG 2
+#define LINUX_TCP_CORK 3
+
struct linux_sockaddr {
u_short sa_family;
char sa_data[14];
Index: compat/linux/linux_socket.c
===================================================================
RCS file: /home/ncvs/src/sys/compat/linux/linux_socket.c,v
retrieving revision 1.25
diff -u -r1.25 linux_socket.c
--- compat/linux/linux_socket.c 2000/12/19 00:24:25 1.25
+++ compat/linux/linux_socket.c 2001/02/08 21:58:19
@@ -46,6 +46,7 @@
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
+#include <netinet/tcp.h>
#include <machine/../linux/linux.h>
#include <machine/../linux/linux_proto.h>
@@ -86,6 +87,21 @@
}
static int
+linux_to_bsd_tcp_sockopt(int opt)
+{
+
+ switch (opt) {
+ case LINUX_TCP_NODELAY:
+ return (TCP_NODELAY);
+ case LINUX_TCP_MAXSEG:
+ return (TCP_MAXSEG);
+ case LINUX_TCP_CORK:
+ return (TCP_CORK);
+ }
+ return (-1);
+}
+
+static int
linux_to_bsd_ip_sockopt(int opt)
{
@@ -831,8 +847,7 @@
name = linux_to_bsd_ip_sockopt(linux_args.optname);
break;
case IPPROTO_TCP:
- /* Linux TCP option values match BSD's */
- name = linux_args.optname;
+ name = linux_to_bsd_tcp_sockopt(linux_args.optname);
break;
default:
name = -1;
Index: i386/linux/linux.h
===================================================================
RCS file: /home/ncvs/src/sys/i386/linux/linux.h,v
retrieving revision 1.51
diff -u -r1.51 linux.h
--- i386/linux/linux.h 2000/11/30 05:23:47 1.51
+++ i386/linux/linux.h 2001/02/08 22:00:07
@@ -503,6 +503,10 @@
#define LINUX_IP_ADD_MEMBERSHIP 35
#define LINUX_IP_DROP_MEMBERSHIP 36
+#define LINUX_TCP_NODELAY 1
+#define LINUX_TCP_MAXSEG 2
+#define LINUX_TCP_CORK 3
+
struct linux_sockaddr {
u_short sa_family;
char sa_data[14];
Index: netinet/tcp.h
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp.h,v
retrieving revision 1.16
diff -u -r1.16 tcp.h
--- netinet/tcp.h 2001/01/09 18:26:17 1.16
+++ netinet/tcp.h 2001/02/08 21:51:36
@@ -131,7 +131,8 @@
*/
#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */
#define TCP_MAXSEG 0x02 /* set maximum segment size */
-#define TCP_NOPUSH 0x04 /* don't push last block of write */
+#define TCP_CORK 0x03 /* don't push last block of write */
+#define TCP_NOPUSH 0x04 /* ditto and also allow TAO on listen */
#define TCP_NOOPT 0x08 /* don't use TCP options */
#endif
Index: netinet/tcp_input.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.122
diff -u -r1.122 tcp_input.c
--- netinet/tcp_input.c 2001/01/24 16:25:36 1.122
+++ netinet/tcp_input.c 2001/02/08 21:56:24
@@ -822,7 +822,7 @@
#endif
tp = intotcpcb(inp);
tp->t_state = TCPS_LISTEN;
- tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT);
+ tp->t_flags |= tp0->t_flags & (TF_CORK|TF_NOOPT);
/* Compute proper scaling value from buffer space */
while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
Index: netinet/tcp_output.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.48
diff -u -r1.48 tcp_output.c
--- netinet/tcp_output.c 2000/10/27 11:45:41 1.48
+++ netinet/tcp_output.c 2001/02/08 21:56:04
@@ -282,7 +282,7 @@
goto send;
if (!(tp->t_flags & TF_MORETOCOME) &&
(idle || tp->t_flags & TF_NODELAY) &&
- (tp->t_flags & TF_NOPUSH) == 0 &&
+ (tp->t_flags & TF_CORK) == 0 &&
len + off >= so->so_snd.sb_cc)
goto send;
if (tp->t_force)
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.53
diff -u -r1.53 tcp_usrreq.c
--- netinet/tcp_usrreq.c 2001/02/02 18:48:25 1.53
+++ netinet/tcp_usrreq.c 2001/02/08 21:54:54
@@ -895,6 +895,8 @@
switch (sopt->sopt_name) {
case TCP_NODELAY:
case TCP_NOOPT:
+ case TCP_CORK:
+ case TCP_NOPUSH:
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
if (error)
@@ -907,6 +909,12 @@
case TCP_NOOPT:
opt = TF_NOOPT;
break;
+ case TCP_CORK:
+ opt = TF_CORK;
+ break;
+ case TCP_NOPUSH:
+ opt = TF_NOPUSH | TF_CORK;
+ break;
default:
opt = 0; /* dead code to fool gcc */
break;
@@ -914,21 +922,10 @@
if (optval)
tp->t_flags |= opt;
- else
- tp->t_flags &= ~opt;
- break;
-
- case TCP_NOPUSH:
- error = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
- if (error)
- break;
-
- if (optval)
- tp->t_flags |= TF_NOPUSH;
else {
- tp->t_flags &= ~TF_NOPUSH;
- error = tcp_output(tp);
+ tp->t_flags &= ~opt;
+ if (opt & TF_CORK)
+ error = tcp_output(tp);
}
break;
Index: netinet/tcp_var.h
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.63
diff -u -r1.63 tcp_var.h
--- netinet/tcp_var.h 2000/12/24 10:57:21 1.63
+++ netinet/tcp_var.h 2001/02/08 21:54:10
@@ -89,12 +89,13 @@
#define TF_SACK_PERMIT 0x00200 /* other side said I could SACK */
#define TF_NEEDSYN 0x00400 /* send SYN (implicit state) */
#define TF_NEEDFIN 0x00800 /* send FIN (implicit state) */
-#define TF_NOPUSH 0x01000 /* don't push */
+#define TF_NOPUSH 0x01000 /* enable TAO on listen */
#define TF_REQ_CC 0x02000 /* have/will request CC */
#define TF_RCVD_CC 0x04000 /* a CC was received in SYN */
#define TF_SENDCCNEW 0x08000 /* send CCnew instead of CC in SYN */
#define TF_MORETOCOME 0x10000 /* More data to be appended to sock */
#define TF_LQ_OVERFLOW 0x20000 /* listen queue overflow */
+#define TF_CORK 0x40000 /* don't push */
int t_force; /* 1 if forcing out a byte */
tcp_seq snd_una; /* send unacknowledged */
--------------------------------------------------------------------------------
>Release-Note:
>Audit-Trail:
>Unformatted:
To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-bugs" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?E14QzXB-00024t-00>
