Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 8 Mar 1999 21:57:44 -0500 (EST)
From:      Garrett Wollman <wollman@khavrinen.lcs.mit.edu>
To:        dg@root.com, net@freebsd.org
Subject:   TCP Timer scaleability (with patch)
Message-ID:  <199903090257.VAA21463@khavrinen.lcs.mit.edu>

next in thread | raw e-mail | index | archive | help
One of the problems with the BSD-style TCP is that, for a machine with
large numbers of outstanding TCP connections, TCP timer processing can
take a significant amount of overhead, and -- what's worse -- often
results in an ``avalanache'' of outgoing traffic as many connections
trigger some sort of response at the same time.

The following patch eliminates much of this behavior.  Note that this
is still very, very preliminary, but it does look promising -- at
least, the TCP connection I'm using right now to type this message
is operational.

There is still one big chunk missing in this patch, which is the
conversion of the RTT estimator and related clunkery to measure in
timer ticks rather than slow-timer ticks.  Once this is done, it
should eliminate all the locations where slow run-time multiplies and
divides are done in the patch below.  I expect this to have a
significant beneficial effect on large servers like wcarchive; for
smaller machines, it's probably a bit of a wash: slightly slower TCP
over fast links versus much faster retransmission over lossy links.

The other thing I'm not quite sure about is where I've found all of
the right locations to set t_starttime (the inverse of t_duration in
-current TCP).  I think it should be set whenever the connection hits
or crosses ESTABLISHED, and left zero otherwise, but there may be
other places where it is needed.  (The old code would increment
t_duration in any state other than LISTEN, but I don't think that's
right -- some of the T/TCP code, for example, cared about the actual
duration, and not how long this particular socket was sitting idle
before the connection began.)

-GAWollman

------------------------------------
? tcp.patch
? sys/compile/GENERIC
? sys/compile/FORLENNH
Index: sys/net/if_vlan.c
===================================================================
RCS file: /home/cvs/src/sys/net/if_vlan.c,v
retrieving revision 1.4
diff -u -r1.4 if_vlan.c
--- if_vlan.c	1998/12/04 22:54:52	1.4
+++ if_vlan.c	1999/03/09 02:09:58
@@ -48,7 +48,9 @@
 
 #include <sys/param.h>
 #include <sys/kernel.h>
+#include <sys/linker_set.h>
 #include <sys/mbuf.h>
+#include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
@@ -69,6 +71,7 @@
 #include <netinet/if_ether.h>
 #endif
 
+SYSCTL_DECL(_net_link);		/* XXX */
 SYSCTL_NODE(_net_link, IFT_8021_VLAN, vlan, CTLFLAG_RW, 0, "IEEE 802.1Q VLAN");
 SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, "for consistency");
 
Index: sys/netinet/tcp_input.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.84
diff -u -r1.84 tcp_input.c
--- tcp_input.c	1999/02/06 00:47:45	1.84
+++ tcp_input.c	1999/03/09 02:10:08
@@ -491,9 +491,9 @@
 	 * Segment received on connection.
 	 * Reset idle time and keep-alive timer.
 	 */
-	tp->t_idle = 0;
+	tp->t_rcvtime = ticks;
 	if (TCPS_HAVEESTABLISHED(tp->t_state))
-		tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+		callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
 
 	/*
 	 * Process options if not in LISTEN state,
@@ -559,9 +559,11 @@
 				if ((to.to_flag & TOF_TS) != 0)
 					tcp_xmit_timer(tp,
 					    tcp_now - to.to_tsecr + 1);
-				else if (tp->t_rtt &&
+				else if (tp->t_rtttime &&
 					    SEQ_GT(ti->ti_ack, tp->t_rtseq))
-					tcp_xmit_timer(tp, tp->t_rtt);
+					tcp_xmit_timer(tp, 1 + 
+						       TCPT_SLOWHZ(ticks
+						           - tp->t_rtttime));
 				acked = ti->ti_ack - tp->snd_una;
 				tcpstat.tcps_rcvackpack++;
 				tcpstat.tcps_rcvackbyte += acked;
@@ -579,9 +581,11 @@
 				 * decide between more output or persist.
 				 */
 				if (tp->snd_una == tp->snd_max)
-					tp->t_timer[TCPT_REXMT] = 0;
-				else if (tp->t_timer[TCPT_PERSIST] == 0)
-					tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+					callout_stop(tp->tt_rexmt);
+				else if (!callout_pending(tp->tt_persist))
+					callout_reset(tp->tt_rexmt, 
+						      tp->t_rxtcur,
+						      tcp_timer_rexmt, tp);
 
 				sowwakeup(so);
 				if (so->so_snd.sb_cc)
@@ -728,6 +732,7 @@
 			taop->tao_cc != 0 && CC_GT(to.to_cc, taop->tao_cc)) {
 
 			taop->tao_cc = to.to_cc;
+			tp->t_starttime = ticks;
 			tp->t_state = TCPS_ESTABLISHED;
 
 			/*
@@ -751,7 +756,8 @@
 			tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN);
 			tcpstat.tcps_connects++;
 			soisconnected(so);
-			tp->t_timer[TCPT_KEEP] = tcp_keepinit;
+			callout_reset(tp->tt_keep, tcp_keepinit,
+				      tcp_timer_keep, tp);
 			dropsocket = 0;		/* committed to socket */
 			tcpstat.tcps_accepts++;
 			goto trimthenstep6;
@@ -770,7 +776,7 @@
 		 */
 		tp->t_flags |= TF_ACKNOW;
 		tp->t_state = TCPS_SYN_RECEIVED;
-		tp->t_timer[TCPT_KEEP] = tcp_keepinit;
+		callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
 		dropsocket = 0;		/* committed to socket */
 		tcpstat.tcps_accepts++;
 		goto trimthenstep6;
@@ -878,13 +884,15 @@
 			 *	SYN_SENT  --> ESTABLISHED
 			 *	SYN_SENT* --> FIN_WAIT_1
 			 */
+			tp->t_starttime = ticks;
 			if (tp->t_flags & TF_NEEDFIN) {
 				tp->t_state = TCPS_FIN_WAIT_1;
 				tp->t_flags &= ~TF_NEEDFIN;
 				tiflags &= ~TH_SYN;
 			} else {
 				tp->t_state = TCPS_ESTABLISHED;
-				tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+				callout_reset(tp->tt_keep, tcp_keepidle,
+					      tcp_timer_keep, tp);
 			}
 		} else {
 		/*
@@ -897,7 +905,7 @@
 		 *  If there was no CC option, clear cached CC value.
 		 */
 			tp->t_flags |= TF_ACKNOW;
-			tp->t_timer[TCPT_REXMT] = 0;
+			callout_stop(tp->tt_rexmt);
 			if (to.to_flag & TOF_CC) {
 				if (taop->tao_cc != 0 &&
 				    CC_GT(to.to_cc, taop->tao_cc)) {
@@ -907,12 +915,16 @@
 					 *        SYN-SENT* -> FIN-WAIT-1*
 					 */
 					taop->tao_cc = to.to_cc;
+					tp->t_starttime = ticks;
 					if (tp->t_flags & TF_NEEDFIN) {
 						tp->t_state = TCPS_FIN_WAIT_1;
 						tp->t_flags &= ~TF_NEEDFIN;
 					} else {
 						tp->t_state = TCPS_ESTABLISHED;
-						tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+						callout_reset(tp->tt_keep,
+							      tcp_keepidle,
+							      tcp_timer_keep,
+							      tp);
 					}
 					tp->t_flags |= TF_NEEDSYN;
 				} else
@@ -971,7 +983,7 @@
 		if ((tiflags & TH_SYN) &&
 		    (to.to_flag & TOF_CC) && tp->cc_recv != 0) {
 			if (tp->t_state == TCPS_TIME_WAIT &&
-					tp->t_duration > TCPTV_MSL)
+					(ticks - tp->t_starttime) > TCPTV_MSL)
 				goto dropwithreset;
 			if (CC_GT(to.to_cc, tp->cc_recv)) {
 				tp = tcp_close(tp);
@@ -1287,12 +1299,14 @@
 		 *      SYN-RECEIVED  -> ESTABLISHED
 		 *      SYN-RECEIVED* -> FIN-WAIT-1
 		 */
+		tp->t_starttime = ticks;
 		if (tp->t_flags & TF_NEEDFIN) {
 			tp->t_state = TCPS_FIN_WAIT_1;
 			tp->t_flags &= ~TF_NEEDFIN;
 		} else {
 			tp->t_state = TCPS_ESTABLISHED;
-			tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+			callout_reset(tp->tt_keep, tcp_keepidle, 
+				      tcp_timer_keep, tp);
 		}
 		/*
 		 * If segment contains data or ACK, will call tcp_reass()
@@ -1347,7 +1361,7 @@
 				 * to keep a constant cwnd packets in the
 				 * network.
 				 */
-				if (tp->t_timer[TCPT_REXMT] == 0 ||
+				if (!callout_pending(tp->tt_rexmt) ||
 				    ti->ti_ack != tp->snd_una)
 					tp->t_dupacks = 0;
 				else if (++tp->t_dupacks == tcprexmtthresh) {
@@ -1359,8 +1373,8 @@
 					if (win < 2)
 						win = 2;
 					tp->snd_ssthresh = win * tp->t_maxseg;
-					tp->t_timer[TCPT_REXMT] = 0;
-					tp->t_rtt = 0;
+					callout_stop(tp->tt_rexmt);
+					tp->t_rtttime = 0;
 					tp->snd_nxt = ti->ti_ack;
 					tp->snd_cwnd = tp->t_maxseg;
 					(void) tcp_output(tp);
@@ -1428,8 +1442,9 @@
 		 */
 		if (to.to_flag & TOF_TS)
 			tcp_xmit_timer(tp, tcp_now - to.to_tsecr + 1);
-		else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
-			tcp_xmit_timer(tp,tp->t_rtt);
+		else if (tp->t_rtttime && SEQ_GT(ti->ti_ack, tp->t_rtseq))
+			tcp_xmit_timer(tp, 
+				       1 + TCPT_SLOWHZ(ticks - tp->t_rtttime));
 
 		/*
 		 * If all outstanding data is acked, stop retransmit
@@ -1438,10 +1453,11 @@
 		 * timer, using current (possibly backed-off) value.
 		 */
 		if (ti->ti_ack == tp->snd_max) {
-			tp->t_timer[TCPT_REXMT] = 0;
+			callout_stop(tp->tt_rexmt);
 			needoutput = 1;
-		} else if (tp->t_timer[TCPT_PERSIST] == 0)
-			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+		} else if (!callout_pending(tp->tt_persist))
+			callout_reset(tp->tt_rexmt, tp->t_rxtcur,
+				      tcp_timer_rexmt, tp);
 
 		/*
 		 * If no data (only SYN) was ACK'd,
@@ -1497,7 +1513,8 @@
 				 */
 				if (so->so_state & SS_CANTRCVMORE) {
 					soisdisconnected(so);
-					tp->t_timer[TCPT_2MSL] = tcp_maxidle;
+					callout_reset(tp->tt_2msl, tcp_maxidle,
+						      tcp_timer_2msl, tp);
 				}
 				tp->t_state = TCPS_FIN_WAIT_2;
 			}
@@ -1515,11 +1532,14 @@
 				tcp_canceltimers(tp);
 				/* Shorten TIME_WAIT [RFC-1644, p.28] */
 				if (tp->cc_recv != 0 &&
-				    tp->t_duration < TCPTV_MSL)
-					tp->t_timer[TCPT_2MSL] =
-					    tp->t_rxtcur * TCPTV_TWTRUNC;
+				    (ticks - tp->t_starttime) < TCPTV_MSL)
+					callout_reset(tp->tt_2msl,
+						      tp->t_rxtcur *
+						      TCPTV_TWTRUNC,
+						      tcp_timer_2msl, tp);
 				else
-					tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+					callout_reset(tp->tt_2msl, 2*TCPTV_MSL,
+						      tcp_timer_2msl, tp);
 				soisdisconnected(so);
 			}
 			break;
@@ -1543,7 +1563,8 @@
 		 * it and restart the finack timer.
 		 */
 		case TCPS_TIME_WAIT:
-			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+			callout_reset(tp->tt_2msl, 2 * TCPTV_MSL,
+				      tcp_timer_2msl, tp);
 			goto dropafterack;
 		}
 	}
@@ -1679,6 +1700,7 @@
 		 * enter the CLOSE_WAIT state.
 		 */
 		case TCPS_SYN_RECEIVED:
+			tp->t_starttime = ticks;
 		case TCPS_ESTABLISHED:
 			tp->t_state = TCPS_CLOSE_WAIT;
 			break;
@@ -1701,14 +1723,16 @@
 			tcp_canceltimers(tp);
 			/* Shorten TIME_WAIT [RFC-1644, p.28] */
 			if (tp->cc_recv != 0 &&
-			    tp->t_duration < TCPTV_MSL) {
-				tp->t_timer[TCPT_2MSL] =
-				    tp->t_rxtcur * TCPTV_TWTRUNC;
+			    (ticks - tp->t_starttime) < TCPTV_MSL) {
+				callout_reset(tp->tt_2msl,
+					      tp->t_rxtcur * TCPTV_TWTRUNC,
+					      tcp_timer_2msl, tp);
 				/* For transaction client, force ACK now. */
 				tp->t_flags |= TF_ACKNOW;
 			}
 			else
-				tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+				callout_reset(tp->tt_2msl, 2 * TCPTV_MSL,
+					      tcp_timer_2msl, tp);
 			soisdisconnected(so);
 			break;
 
@@ -1716,7 +1740,8 @@
 		 * In TIME_WAIT state restart the 2 MSL time_wait timer.
 		 */
 		case TCPS_TIME_WAIT:
-			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+			callout_reset(tp->tt_2msl, 2 * TCPTV_MSL,
+				      tcp_timer_2msl, tp);
 			break;
 		}
 	}
@@ -1997,7 +2022,7 @@
 		tp->t_srtt = rtt << TCP_RTT_SHIFT;
 		tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
 	}
-	tp->t_rtt = 0;
+	tp->t_rtttime = 0;
 	tp->t_rxtshift = 0;
 
 	/*
@@ -2011,8 +2036,12 @@
 	 * statistical, we have to test that we don't drop below
 	 * the minimum feasible timer (which is 2 ticks).
 	 */
-	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
-		      max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
+	{
+		int newrxt;
+		TCPT_RANGESET(newrxt, TCP_REXMTVAL(tp),
+			      max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
+		tp->t_rxtcur = TCPT_TICKS(newrxt);
+	}
 
 	/*
 	 * We received an ack for a packet that wasn't retransmitted;
@@ -2119,10 +2148,14 @@
 			/* default variation is +- 1 rtt */
 			tp->t_rttvar =
 			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
+		}
+		{
+			int newrxt;
+			TCPT_RANGESET(newrxt,
+				      ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+				      tp->t_rttmin, TCPTV_REXMTMAX);
+			tp->t_rxtcur = TCPT_TICKS(newrxt);
 		}
-		TCPT_RANGESET(tp->t_rxtcur,
-		    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
-		    tp->t_rttmin, TCPTV_REXMTMAX);
 	}
 	/*
 	 * if there's an mtu associated with the route, use it
Index: sys/netinet/tcp_output.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.32
diff -u -r1.32 tcp_output.c
--- tcp_output.c	1999/01/20 17:31:59	1.32
+++ tcp_output.c	1999/03/09 02:10:11
@@ -40,6 +40,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/kernel.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
@@ -93,7 +94,7 @@
 	 * to send, then transmit; otherwise, investigate further.
 	 */
 	idle = (tp->snd_max == tp->snd_una);
-	if (idle && tp->t_idle >= tp->t_rxtcur)
+	if (idle && (ticks - tp->t_rcvtime) >= tp->t_rxtcur)
 		/*
 		 * We have been idle for "a while" and no acks are
 		 * expected to clock out any data we send --
@@ -143,7 +144,7 @@
 				flags &= ~TH_FIN;
 			win = 1;
 		} else {
-			tp->t_timer[TCPT_PERSIST] = 0;
+			callout_stop(tp->tt_persist);
 			tp->t_rxtshift = 0;
 		}
 	}
@@ -194,10 +195,10 @@
 		 */
 		len = 0;
 		if (win == 0) {
-			tp->t_timer[TCPT_REXMT] = 0;
+			callout_stop(tp->tt_rexmt);
 			tp->t_rxtshift = 0;
 			tp->snd_nxt = tp->snd_una;
-			if (tp->t_timer[TCPT_PERSIST] == 0)
+			if (!callout_pending(tp->tt_persist))
 				tcp_setpersist(tp);
 		}
 	}
@@ -285,11 +286,11 @@
 	 *	persisting		to move a small or zero window
 	 *	(re)transmitting	and thereby not persisting
 	 *
-	 * tp->t_timer[TCPT_PERSIST]
-	 *	is set when we are in persist state.
+	 * callout_pending(tp->tt_persist)
+	 *	is true when we are in persist state.
 	 * tp->t_force
 	 *	is set when we are called to send a persist packet.
-	 * tp->t_timer[TCPT_REXMT]
+	 * callout_pending(tp->tt_rexmt)
 	 *	is set when we are retransmitting
 	 * The output side is idle when both timers are zero.
 	 *
@@ -299,8 +300,8 @@
 	 * if window is nonzero, transmit what we can,
 	 * otherwise force out a byte.
 	 */
-	if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
-	    tp->t_timer[TCPT_PERSIST] == 0) {
+	if (so->so_snd.sb_cc && !callout_pending(tp->tt_rexmt) &&
+	    !callout_pending(tp->tt_persist)) {
 		tp->t_rxtshift = 0;
 		tcp_setpersist(tp);
 	}
@@ -563,7 +564,8 @@
 	 * case, since we know we aren't doing a retransmission.
 	 * (retransmit and persist are mutually exclusive...)
 	 */
-	if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST])
+	if (len || (flags & (TH_SYN|TH_FIN)) 
+	    || callout_pending(tp->tt_persist))
 		ti->ti_seq = htonl(tp->snd_nxt);
 	else
 		ti->ti_seq = htonl(tp->snd_max);
@@ -609,7 +611,7 @@
 	 * In transmit state, time the transmission and arrange for
 	 * the retransmit.  In persist state, just set snd_max.
 	 */
-	if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
+	if (tp->t_force == 0 || !callout_pending(tp->tt_persist)) {
 		tcp_seq startseq = tp->snd_nxt;
 
 		/*
@@ -630,8 +632,8 @@
 			 * Time this transmission if not a retransmission and
 			 * not currently timing anything.
 			 */
-			if (tp->t_rtt == 0) {
-				tp->t_rtt = 1;
+			if (tp->t_rtttime == 0) {
+				tp->t_rtttime = ticks;
 				tp->t_rtseq = startseq;
 				tcpstat.tcps_segstimed++;
 			}
@@ -645,11 +647,12 @@
 		 * Initialize shift counter which is used for backoff
 		 * of retransmit time.
 		 */
-		if (tp->t_timer[TCPT_REXMT] == 0 &&
+		if (!callout_pending(tp->tt_rexmt) &&
 		    tp->snd_nxt != tp->snd_una) {
-			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
-			if (tp->t_timer[TCPT_PERSIST]) {
-				tp->t_timer[TCPT_PERSIST] = 0;
+			callout_reset(tp->tt_rexmt, tp->t_rxtcur,
+				      tcp_timer_rexmt, tp);
+			if (callout_pending(tp->tt_persist)) {
+				callout_stop(tp->tt_persist);
 				tp->t_rxtshift = 0;
 			}
 		}
@@ -742,16 +745,17 @@
 tcp_setpersist(tp)
 	register struct tcpcb *tp;
 {
-	register int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
+	int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
+	int tt;
 
-	if (tp->t_timer[TCPT_REXMT])
-		panic("tcp_output REXMT");
+	if (callout_pending(tp->tt_rexmt))
+		panic("tcp_setpersist: retransmit pending");
 	/*
 	 * Start/restart persistance timer.
 	 */
-	TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
-	    t * tcp_backoff[tp->t_rxtshift],
-	    TCPTV_PERSMIN, TCPTV_PERSMAX);
+	TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
+		      TCPTV_PERSMIN, TCPTV_PERSMAX);
+	callout_reset(tp->tt_persist, TCPT_TICKS(tt), tcp_timer_persist, tp);
 	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
 		tp->t_rxtshift++;
 }
Index: sys/netinet/tcp_subr.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.52
diff -u -r1.52 tcp_subr.c
--- tcp_subr.c	1999/02/04 03:27:43	1.52
+++ tcp_subr.c	1999/03/09 02:10:14
@@ -39,13 +39,14 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/callout.h>
 #include <sys/kernel.h>
-#include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
-#include <sys/protosw.h>
+#include <sys/sysctl.h>
 
 #include <vm/vm_zone.h>
 
@@ -117,6 +118,7 @@
 		char	align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1];
 	} inp_tp_u;
 	struct	tcpcb tcb;
+	struct	callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl;
 };
 #undef ALIGNMENT
 #undef ALIGNM1
@@ -132,6 +134,12 @@
 	tcp_iss = random();	/* wrong, but better than a constant */
 	tcp_ccgen = 1;
 	tcp_cleartaocache();
+
+	tcp_keepinit = TCPTV_KEEP_INIT;
+	tcp_keepidle = TCPTV_KEEP_IDLE;
+	tcp_keepintvl = TCPTV_KEEPINTVL;
+	tcp_maxpersistidle = TCPTV_KEEP_IDLE;
+
 	LIST_INIT(&tcb);
 	tcbinfo.listhead = &tcb;
 	if (!(getenv_int("net.inet.tcp.tcbhashsize", &hashsize)))
@@ -145,6 +153,7 @@
 					&tcbinfo.porthashmask);
 	tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets,
 				 ZONE_INTERRUPT, 0);
+
 	if (max_protohdr < sizeof(struct tcpiphdr))
 		max_protohdr = sizeof(struct tcpiphdr);
 	if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
@@ -299,6 +308,12 @@
 	tp->t_segq = NULL;
 	tp->t_maxseg = tp->t_maxopd = tcp_mssdflt;
 
+	/* Set up our timeouts. */
+	callout_init(tp->tt_rexmt = &it->inp_tp_rexmt);
+	callout_init(tp->tt_persist = &it->inp_tp_persist);
+	callout_init(tp->tt_keep = &it->inp_tp_keep);
+	callout_init(tp->tt_2msl = &it->inp_tp_2msl);
+
 	if (tcp_do_rfc1323)
 		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
 	if (tcp_do_rfc1644)
@@ -312,7 +327,7 @@
 	tp->t_srtt = TCPTV_SRTTBASE;
 	tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
 	tp->t_rttmin = TCPTV_MIN;
-	tp->t_rxtcur = TCPTV_RTOBASE;
+	tp->t_rxtcur = TCPT_TICKS(TCPTV_RTOBASE);
 	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	inp->inp_ip_ttl = ip_defttl;
@@ -362,6 +377,15 @@
 	int dosavessthresh;
 
 	/*
+	 * Make sure that all of our timers are stopped before we
+	 * delete the PCB.
+	 */
+	callout_stop(tp->tt_rexmt);
+	callout_stop(tp->tt_persist);
+	callout_stop(tp->tt_keep);
+	callout_stop(tp->tt_2msl);
+
+	/*
 	 * If we got enough samples through the srtt filter,
 	 * save the rtt and rttvar in the routing entry.
 	 * 'Enough' is arbitrarily defined as the 16 samples.
@@ -693,7 +717,7 @@
 		tp->t_maxseg = mss;
 
 		tcpstat.tcps_mturesent++;
-		tp->t_rtt = 0;
+		tp->t_rtttime = 0;
 		tp->snd_nxt = tp->snd_una;
 		tcp_output(tp);
 	}
Index: sys/netinet/tcp_timer.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.28
diff -u -r1.28 tcp_timer.c
--- tcp_timer.c	1998/04/24 09:25:35	1.28
+++ tcp_timer.c	1999/03/09 02:10:15
@@ -63,15 +63,15 @@
 #include <netinet/tcp_debug.h>
 #endif
 
-int	tcp_keepinit = TCPTV_KEEP_INIT;
+int	tcp_keepinit;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit,
 	CTLFLAG_RW, &tcp_keepinit , 0, "");
 
-int	tcp_keepidle = TCPTV_KEEP_IDLE;
+int	tcp_keepidle;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle,
 	CTLFLAG_RW, &tcp_keepidle , 0, "");
 
-static int	tcp_keepintvl = TCPTV_KEEPINTVL;
+int	tcp_keepintvl;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl,
 	CTLFLAG_RW, &tcp_keepintvl , 0, "");
 
@@ -81,7 +81,7 @@
 
 static int	tcp_keepcnt = TCPTV_KEEPCNT;
 	/* max idle probes */
-static int	tcp_maxpersistidle = TCPTV_KEEP_IDLE;
+int	tcp_maxpersistidle;
 	/* max idle time in persist */
 int	tcp_maxidle;
 
@@ -120,7 +120,6 @@
 {
 	register struct inpcb *ip, *ipnxt;
 	register struct tcpcb *tp;
-	register int i;
 	int s;
 #ifdef TCPDEBUG
 	int ostate;
@@ -143,29 +142,6 @@
 		tp = intotcpcb(ip);
 		if (tp == 0 || tp->t_state == TCPS_LISTEN)
 			continue;
-		for (i = 0; i < TCPT_NTIMERS; i++) {
-			if (tp->t_timer[i] && --tp->t_timer[i] == 0) {
-#ifdef TCPDEBUG
-				ostate = tp->t_state;
-#endif
-				tp = tcp_timers(tp, i);
-				if (tp == NULL)
-					goto tpgone;
-#ifdef TCPDEBUG
-				if (tp->t_inpcb->inp_socket->so_options
-				    & SO_DEBUG)
-					tcp_trace(TA_USER, ostate, tp,
-						  (struct tcpiphdr *)0,
-						  PRU_SLOWTIMO);
-#endif
-			}
-		}
-		tp->t_idle++;
-		tp->t_duration++;
-		if (tp->t_rtt)
-			tp->t_rtt++;
-tpgone:
-		;
 	}
 	tcp_iss += TCP_ISSINCR/PR_SLOWHZ;		/* increment iss */
 #ifdef TCP_COMPAT_42
@@ -183,10 +159,10 @@
 tcp_canceltimers(tp)
 	struct tcpcb *tp;
 {
-	register int i;
-
-	for (i = 0; i < TCPT_NTIMERS; i++)
-		tp->t_timer[i] = 0;
+	callout_stop(tp->tt_2msl);
+	callout_stop(tp->tt_persist);
+	callout_stop(tp->tt_keep);
+	callout_stop(tp->tt_rexmt);
 }
 
 int	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
@@ -197,175 +173,242 @@
 /*
  * TCP timer processing.
  */
-struct tcpcb *
-tcp_timers(tp, timer)
-	register struct tcpcb *tp;
-	int timer;
-{
-	register int rexmt;
 
-	switch (timer) {
+void
+tcp_timer_2msl(xtp)
+	void *xtp;
+{
+	struct tcpcb *tp = xtp;
+#ifdef TCPDEBUG
+	int ostate;
 
+	ostate = tp->t_state;
+#endif
 	/*
 	 * 2 MSL timeout in shutdown went off.  If we're closed but
 	 * still waiting for peer to close and connection has been idle
 	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
 	 * control block.  Otherwise, check again in a bit.
 	 */
-	case TCPT_2MSL:
-		if (tp->t_state != TCPS_TIME_WAIT &&
-		    tp->t_idle <= tcp_maxidle)
-			tp->t_timer[TCPT_2MSL] = tcp_keepintvl;
-		else
-			tp = tcp_close(tp);
-		break;
+	if (tp->t_state != TCPS_TIME_WAIT &&
+	    (ticks - tp->t_rcvtime) <= tcp_maxidle)
+		callout_reset(tp->tt_2msl, tcp_keepintvl,
+			      tcp_timer_2msl, tp);
+	else
+		tp = tcp_close(tp);
 
+#ifdef TCPDEBUG
+	if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0,
+			  PRU_SLOWTIMO);
+#endif
+}
+
+void
+tcp_timer_keep(xtp)
+	void *xtp;
+{
+	struct tcpcb *tp = xtp;
+#ifdef TCPDEBUG
+	int ostate;
+
+	ostate = tp->t_state;
+#endif
 	/*
-	 * Retransmission timer went off.  Message has not
-	 * been acked within retransmit interval.  Back off
-	 * to a longer retransmit interval and retransmit one segment.
+	 * Keep-alive timer went off; send something
+	 * or drop connection if idle for too long.
 	 */
-	case TCPT_REXMT:
-		if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
-			tp->t_rxtshift = TCP_MAXRXTSHIFT;
-			tcpstat.tcps_timeoutdrop++;
-			tp = tcp_drop(tp, tp->t_softerror ?
-			    tp->t_softerror : ETIMEDOUT);
-			break;
-		}
-		tcpstat.tcps_rexmttimeo++;
-		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
-		TCPT_RANGESET(tp->t_rxtcur, rexmt,
-		    tp->t_rttmin, TCPTV_REXMTMAX);
-		tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
-		/*
-		 * If losing, let the lower level know and try for
-		 * a better route.  Also, if we backed off this far,
-		 * our srtt estimate is probably bogus.  Clobber it
-		 * so we'll take the next rtt measurement as our srtt;
-		 * move the current srtt into rttvar to keep the current
-		 * retransmit times until then.
-		 */
-		if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
-			in_losing(tp->t_inpcb);
-			tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
-			tp->t_srtt = 0;
-		}
-		tp->snd_nxt = tp->snd_una;
-		/*
-		 * Force a segment to be sent.
-		 */
-		tp->t_flags |= TF_ACKNOW;
+	tcpstat.tcps_keeptimeo++;
+	if (tp->t_state < TCPS_ESTABLISHED)
+		goto dropit;
+	if ((always_keepalive ||
+	     tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) &&
+	    tp->t_state <= TCPS_CLOSING) {
+		if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle)
+			goto dropit;
 		/*
-		 * If timing a segment in this window, stop the timer.
+		 * Send a packet designed to force a response
+		 * if the peer is up and reachable:
+		 * either an ACK if the connection is still alive,
+		 * or an RST if the peer has closed the connection
+		 * due to timeout or reboot.
+		 * Using sequence number tp->snd_una-1
+		 * causes the transmitted zero-length segment
+		 * to lie outside the receive window;
+		 * by the protocol spec, this requires the
+		 * correspondent TCP to respond.
 		 */
-		tp->t_rtt = 0;
+		tcpstat.tcps_keepprobe++;
+#ifdef TCP_COMPAT_42
 		/*
-		 * Close the congestion window down to one segment
-		 * (we'll open it by one segment for each ack we get).
-		 * Since we probably have a window's worth of unacked
-		 * data accumulated, this "slow start" keeps us from
-		 * dumping all that data as back-to-back packets (which
-		 * might overwhelm an intermediate gateway).
-		 *
-		 * There are two phases to the opening: Initially we
-		 * open by one mss on each ack.  This makes the window
-		 * size increase exponentially with time.  If the
-		 * window is larger than the path can handle, this
-		 * exponential growth results in dropped packet(s)
-		 * almost immediately.  To get more time between
-		 * drops but still "push" the network to take advantage
-		 * of improving conditions, we switch from exponential
-		 * to linear window opening at some threshhold size.
-		 * For a threshhold, we use half the current window
-		 * size, truncated to a multiple of the mss.
-		 *
-		 * (the minimum cwnd that will give us exponential
-		 * growth is 2 mss.  We don't allow the threshhold
-		 * to go below this.)
+		 * The keepalive packet must have nonzero length
+		 * to get a 4.2 host to respond.
 		 */
-		{
-		u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
-		if (win < 2)
-			win = 2;
-		tp->snd_cwnd = tp->t_maxseg;
-		tp->snd_ssthresh = win * tp->t_maxseg;
-		tp->t_dupacks = 0;
-		}
-		(void) tcp_output(tp);
-		break;
+		tcp_respond(tp, tp->t_template, (struct mbuf *)NULL,
+			    tp->rcv_nxt - 1, tp->snd_una - 1, 0);
+#else
+		tcp_respond(tp, tp->t_template, (struct mbuf *)NULL,
+			    tp->rcv_nxt, tp->snd_una - 1, 0);
+#endif
+		callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
+	} else
+		callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
+
+#ifdef TCPDEBUG
+	if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
+		tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0,
+			  PRU_SLOWTIMO);
+#endif
+	return;
+
+dropit:
+	tcpstat.tcps_keepdrops++;
+	tp = tcp_drop(tp, ETIMEDOUT);
 
+#ifdef TCPDEBUG
+	if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0,
+			  PRU_SLOWTIMO);
+#endif
+}
+
+void
+tcp_timer_persist(xtp)
+	void *xtp;
+{
+	struct tcpcb *tp = xtp;
+#ifdef TCPDEBUG
+	int ostate;
+
+	ostate = tp->t_state;
+#endif
+
 	/*
 	 * Persistance timer into zero window.
 	 * Force a byte to be output, if possible.
 	 */
-	case TCPT_PERSIST:
-		tcpstat.tcps_persisttimeo++;
-		/*
-		 * Hack: if the peer is dead/unreachable, we do not
-		 * time out if the window is closed.  After a full
-		 * backoff, drop the connection if the idle time
-		 * (no responses to probes) reaches the maximum
-		 * backoff that we would use if retransmitting.
-		 */
-		if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
-		    (tp->t_idle >= tcp_maxpersistidle ||
-		    tp->t_idle >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
-			tcpstat.tcps_persistdrop++;
-			tp = tcp_drop(tp, ETIMEDOUT);
-			break;
-		}
-		tcp_setpersist(tp);
-		tp->t_force = 1;
-		(void) tcp_output(tp);
-		tp->t_force = 0;
-		break;
-
+	tcpstat.tcps_persisttimeo++;
 	/*
-	 * Keep-alive timer went off; send something
-	 * or drop connection if idle for too long.
+	 * Hack: if the peer is dead/unreachable, we do not
+	 * time out if the window is closed.  After a full
+	 * backoff, drop the connection if the idle time
+	 * (no responses to probes) reaches the maximum
+	 * backoff that we would use if retransmitting.
 	 */
-	case TCPT_KEEP:
-		tcpstat.tcps_keeptimeo++;
-		if (tp->t_state < TCPS_ESTABLISHED)
-			goto dropit;
-		if ((always_keepalive ||
-		    tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) &&
-		    tp->t_state <= TCPS_CLOSING) {
-		    	if (tp->t_idle >= tcp_keepidle + tcp_maxidle)
-				goto dropit;
-			/*
-			 * Send a packet designed to force a response
-			 * if the peer is up and reachable:
-			 * either an ACK if the connection is still alive,
-			 * or an RST if the peer has closed the connection
-			 * due to timeout or reboot.
-			 * Using sequence number tp->snd_una-1
-			 * causes the transmitted zero-length segment
-			 * to lie outside the receive window;
-			 * by the protocol spec, this requires the
-			 * correspondent TCP to respond.
-			 */
-			tcpstat.tcps_keepprobe++;
-#ifdef TCP_COMPAT_42
-			/*
-			 * The keepalive packet must have nonzero length
-			 * to get a 4.2 host to respond.
-			 */
-			tcp_respond(tp, tp->t_template, (struct mbuf *)NULL,
-			    tp->rcv_nxt - 1, tp->snd_una - 1, 0);
-#else
-			tcp_respond(tp, tp->t_template, (struct mbuf *)NULL,
-			    tp->rcv_nxt, tp->snd_una - 1, 0);
-#endif
-			tp->t_timer[TCPT_KEEP] = tcp_keepintvl;
-		} else
-			tp->t_timer[TCPT_KEEP] = tcp_keepidle;
-		break;
-	dropit:
-		tcpstat.tcps_keepdrops++;
+	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
+	    ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle ||
+	     (ticks - tp->t_rcvtime)
+	     >= TCPT_TICKS(TCP_REXMTVAL(tp) * tcp_totbackoff))) {
+		tcpstat.tcps_persistdrop++;
 		tp = tcp_drop(tp, ETIMEDOUT);
-		break;
+		goto out;
+	}
+	tcp_setpersist(tp);
+	tp->t_force = 1;
+	(void) tcp_output(tp);
+	tp->t_force = 0;
+
+out:
+#ifdef TCPDEBUG
+	if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
+		tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0,
+			  PRU_SLOWTIMO);
+#endif
+}
+
+void
+tcp_timer_rexmt(xtp)
+	void *xtp;
+{
+	struct tcpcb *tp = xtp;
+	int rexmt;
+#ifdef TCPDEBUG
+	int ostate;
+
+	ostate = tp->t_state;
+#endif
+
+	/*
+	 * Retransmission timer went off.  Message has not
+	 * been acked within retransmit interval.  Back off
+	 * to a longer retransmit interval and retransmit one segment.
+	 */
+	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+		tp->t_rxtshift = TCP_MAXRXTSHIFT;
+		tcpstat.tcps_timeoutdrop++;
+		tp = tcp_drop(tp, tp->t_softerror ?
+			      tp->t_softerror : ETIMEDOUT);
+		goto out;
+	}
+	tcpstat.tcps_rexmttimeo++;
+	rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
+	{
+		int newrxt;
+		TCPT_RANGESET(newrxt, rexmt,
+			      tp->t_rttmin, TCPTV_REXMTMAX);
+		tp->t_rxtcur = TCPT_TICKS(newrxt);
+		callout_reset(tp->tt_rexmt, tp->t_rxtcur, tcp_timer_rexmt, tp);
+	}
+	/*
+	 * If losing, let the lower level know and try for
+	 * a better route.  Also, if we backed off this far,
+	 * our srtt estimate is probably bogus.  Clobber it
+	 * so we'll take the next rtt measurement as our srtt;
+	 * move the current srtt into rttvar to keep the current
+	 * retransmit times until then.
+	 */
+	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
+		in_losing(tp->t_inpcb);
+		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
+		tp->t_srtt = 0;
+	}
+	tp->snd_nxt = tp->snd_una;
+	/*
+	 * Force a segment to be sent.
+	 */
+	tp->t_flags |= TF_ACKNOW;
+	/*
+	 * If timing a segment in this window, stop the timer.
+	 */
+	tp->t_rtttime = 0;
+	/*
+	 * Close the congestion window down to one segment
+	 * (we'll open it by one segment for each ack we get).
+	 * Since we probably have a window's worth of unacked
+	 * data accumulated, this "slow start" keeps us from
+	 * dumping all that data as back-to-back packets (which
+	 * might overwhelm an intermediate gateway).
+	 *
+	 * There are two phases to the opening: Initially we
+	 * open by one mss on each ack.  This makes the window
+	 * size increase exponentially with time.  If the
+	 * window is larger than the path can handle, this
+	 * exponential growth results in dropped packet(s)
+	 * almost immediately.  To get more time between
+	 * drops but still "push" the network to take advantage
+	 * of improving conditions, we switch from exponential
+	 * to linear window opening at some threshhold size.
+	 * For a threshhold, we use half the current window
+	 * size, truncated to a multiple of the mss.
+	 *
+	 * (the minimum cwnd that will give us exponential
+	 * growth is 2 mss.  We don't allow the threshhold
+	 * to go below this.)
+	 */
+	{
+		u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
+		if (win < 2)
+			win = 2;
+		tp->snd_cwnd = tp->t_maxseg;
+		tp->snd_ssthresh = win * tp->t_maxseg;
+		tp->t_dupacks = 0;
 	}
-	return (tp);
+	(void) tcp_output(tp);
+
+out:
+#ifdef TCPDEBUG
+	if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0,
+			  PRU_SLOWTIMO);
+#endif
 }
Index: sys/netinet/tcp_timer.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_timer.h,v
retrieving revision 1.13
diff -u -r1.13 tcp_timer.h
--- tcp_timer.h	1997/09/07 05:26:48	1.13
+++ tcp_timer.h	1999/03/09 02:10:16
@@ -38,17 +38,6 @@
 #define _NETINET_TCP_TIMER_H_
 
 /*
- * Definitions of the TCP timers.  These timers are counted
- * down PR_SLOWHZ times a second.
- */
-#define	TCPT_NTIMERS	4
-
-#define	TCPT_REXMT	0		/* retransmit */
-#define	TCPT_PERSIST	1		/* retransmit persistence */
-#define	TCPT_KEEP	2		/* keep alive */
-#define	TCPT_2MSL	3		/* 2*msl quiet time timer */
-
-/*
  * The TCPT_REXMT timer is used to force retransmissions.
  * The TCP has the TCPT_REXMT timer set whenever segments
  * have been sent for which ACKs are expected but not yet
@@ -87,7 +76,7 @@
 /*
  * Time constants.
  */
-#define	TCPTV_MSL	( 30*PR_SLOWHZ)		/* max seg lifetime (hah!) */
+#define	TCPTV_MSL	( 30*hz)		/* max seg lifetime (hah!) */
 #define	TCPTV_SRTTBASE	0			/* base roundtrip time;
 						   if 0, no idea yet */
 #define	TCPTV_RTOBASE	(  3*PR_SLOWHZ)		/* assumed RTO if no info */
@@ -96,9 +85,9 @@
 #define	TCPTV_PERSMIN	(  5*PR_SLOWHZ)		/* retransmit persistence */
 #define	TCPTV_PERSMAX	( 60*PR_SLOWHZ)		/* maximum persist interval */
 
-#define	TCPTV_KEEP_INIT	( 75*PR_SLOWHZ)		/* initial connect keep alive */
-#define	TCPTV_KEEP_IDLE	(120*60*PR_SLOWHZ)	/* dflt time before probing */
-#define	TCPTV_KEEPINTVL	( 75*PR_SLOWHZ)		/* default probe interval */
+#define	TCPTV_KEEP_INIT	( 75*hz)		/* initial connect keepalive */
+#define	TCPTV_KEEP_IDLE	(120*60*hz)		/* dflt time before probing */
+#define	TCPTV_KEEPINTVL	( 75*hz)		/* default probe interval */
 #define	TCPTV_KEEPCNT	8			/* max probes before drop */
 
 #define	TCPTV_MIN	(  1*PR_SLOWHZ)		/* minimum allowable value */
@@ -118,20 +107,36 @@
 /*
  * Force a time value to be in a certain range.
  */
-#define	TCPT_RANGESET(tv, value, tvmin, tvmax) { \
+#define	TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
 	(tv) = (value); \
 	if ((u_long)(tv) < (u_long)(tvmin)) \
 		(tv) = (tvmin); \
 	else if ((u_long)(tv) > (u_long)(tvmax)) \
 		(tv) = (tvmax); \
-}
+} while(0)
 
+/*
+ * Convert slow-timeout ticks to timer ticks.  We don't really want to do
+ * this as it is rather expensive, so this is only a transitional stage
+ * until we are able to update all the code which counts timer ticks.
+ */
+#define	TCPT_TICKS(stt)		((stt) * hz / PR_SLOWHZ)
+#define	TCPT_SLOWHZ(tt)		(((tt) * PR_SLOWHZ) / hz)
+
 #ifdef KERNEL
 extern int tcp_keepinit;		/* time to establish connection */
 extern int tcp_keepidle;		/* time before keepalive probes begin */
+extern int tcp_keepintvl;		/* time between keepalive probes */
 extern int tcp_maxidle;			/* time to drop after starting probes */
+extern int tcp_maxpersistidle;
 extern int tcp_ttl;			/* time to live for TCP segs */
 extern int tcp_backoff[];
-#endif
 
-#endif
+void	tcp_timer_2msl __P((void *xtp));
+void	tcp_timer_keep __P((void *xtp));
+void	tcp_timer_persist __P((void *xtp));
+void	tcp_timer_rexmt __P((void *xtp));
+
+#endif /* KERNEL */
+
+#endif /* !_NETINET_TCP_TIMER_H_ */
Index: sys/netinet/tcp_usrreq.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.40
diff -u -r1.40 tcp_usrreq.c
--- tcp_usrreq.c	1999/01/20 17:31:59	1.40
+++ tcp_usrreq.c	1999/03/09 02:10:18
@@ -514,7 +514,7 @@
 	if (oinp) {
 		if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
 		otp->t_state == TCPS_TIME_WAIT &&
-		    otp->t_duration < TCPTV_MSL &&
+		    (ticks - otp->t_starttime) < TCPTV_MSL &&
 		    (otp->t_flags & TF_RCVD_CC))
 			otp = tcp_close(otp);
 		else
@@ -540,7 +540,7 @@
 	soisconnecting(so);
 	tcpstat.tcps_connattempt++;
 	tp->t_state = TCPS_SYN_SENT;
-	tp->t_timer[TCPT_KEEP] = tcp_keepinit;
+	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
 	tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
 	tcp_sendseqinit(tp);
 
@@ -787,7 +787,8 @@
 		soisdisconnected(tp->t_inpcb->inp_socket);
 		/* To prevent the connection hanging in FIN_WAIT_2 forever. */
 		if (tp->t_state == TCPS_FIN_WAIT_2)
-			tp->t_timer[TCPT_2MSL] = tcp_maxidle;
+			callout_reset(tp->tt_2msl, tcp_maxidle,
+				      tcp_timer_2msl, tp);
 	}
 	return (tp);
 }
Index: sys/netinet/tcp_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.50
diff -u -r1.50 tcp_var.h
--- tcp_var.h	1999/02/16 10:49:52	1.50
+++ tcp_var.h	1999/03/09 02:10:20
@@ -49,7 +49,10 @@
 	int	t_dupacks;		/* consecutive dup acks recd */
 	struct	tcpiphdr *t_template;	/* skeletal packet for transmit */
 
-	int	t_timer[TCPT_NTIMERS];	/* tcp timers */
+	struct	callout *tt_rexmt;	/* retransmit timer */
+	struct	callout *tt_persist;	/* retransmit persistence */
+	struct	callout *tt_keep;	/* keepalive */
+	struct	callout *tt_2msl;	/* 2*msl TIME_WAIT timer */
 
 	struct	inpcb *t_inpcb;		/* back pointer to internet pcb */
 	int	t_state;		/* state of this connection */
@@ -98,12 +101,12 @@
 					 */
 	u_int	t_maxopd;		/* mss plus options */
 
-	u_int	t_idle;			/* inactivity time */
-	u_long	t_duration;		/* connection duration */
-	int	t_rtt;			/* round trip time */
+	u_long	t_rcvtime;		/* inactivity time */
+	u_long	t_starttime;		/* time connection was established */
+	int	t_rtttime;		/* round trip time */
 	tcp_seq	t_rtseq;		/* sequence number being timed */
 
-	int	t_rxtcur;		/* current retransmit value */
+	int	t_rxtcur;		/* current retransmit value (ticks) */
 	u_int	t_maxseg;		/* maximum segment size */
 	int	t_srtt;			/* smoothed round-trip time */
 	int	t_rttvar;		/* variance in round-trip time */
Index: sys/sys/callout.h
===================================================================
RCS file: /home/cvs/src/sys/sys/callout.h,v
retrieving revision 1.12
diff -u -r1.12 callout.h
--- callout.h	1999/03/06 04:46:20	1.12
+++ callout.h	1999/03/09 02:10:22
@@ -75,7 +75,8 @@
 
 #define	callout_fired(c)	((c)->c_flags & CALLOUT_FIRED)
 void	callout_init __P((struct callout *));
-#define	callout_pending(c)	(((c)->c_flags & CALLOUT_PENDING) ? \
+#define	callout_pending(c)	((c)->c_flags & CALLOUT_PENDING)
+#define	callout_pending_at(c)	(((c)->c_flags & CALLOUT_PENDING) ? \
 				 ((c)->c_time - ticks) : 0)
 void	callout_reset __P((struct callout *, int, void (*)(void *), void *));
 void	callout_stop __P((struct callout *));



To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-net" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199903090257.VAA21463>