From owner-p4-projects@FreeBSD.ORG Thu Sep 7 16:55:52 2006 Return-Path: X-Original-To: p4-projects@freebsd.org Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 66F1D16A52D; Thu, 7 Sep 2006 16:55:52 +0000 (UTC) X-Original-To: perforce@freebsd.org Delivered-To: perforce@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 2829016A50B for ; Thu, 7 Sep 2006 16:55:52 +0000 (UTC) (envelope-from mjacob@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [216.136.204.115]) by mx1.FreeBSD.org (Postfix) with ESMTP id B572E43D79 for ; Thu, 7 Sep 2006 16:55:33 +0000 (GMT) (envelope-from mjacob@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.13.6/8.13.6) with ESMTP id k87GtX6j039047 for ; Thu, 7 Sep 2006 16:55:33 GMT (envelope-from mjacob@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.13.6/8.13.4/Submit) id k87GtXsW039044 for perforce@freebsd.org; Thu, 7 Sep 2006 16:55:33 GMT (envelope-from mjacob@freebsd.org) Date: Thu, 7 Sep 2006 16:55:33 GMT Message-Id: <200609071655.k87GtXsW039044@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to mjacob@freebsd.org using -f From: Matt Jacob To: Perforce Change Reviews Cc: Subject: PERFORCE change 105799 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 07 Sep 2006 16:55:52 -0000 http://perforce.freebsd.org/chv.cgi?CH=105799 Change 105799 by mjacob@newisp on 2006/09/07 16:55:22 IFC. Affected files ... .. //depot/projects/newisp/amd64/amd64/machdep.c#2 integrate .. //depot/projects/newisp/i386/i386/machdep.c#2 integrate .. //depot/projects/newisp/netinet/tcp_input.c#3 integrate .. //depot/projects/newisp/netinet/tcp_output.c#2 integrate .. //depot/projects/newisp/netinet/tcp_subr.c#4 integrate .. //depot/projects/newisp/netinet/tcp_timer.c#3 integrate .. //depot/projects/newisp/netinet/tcp_timer.h#2 integrate .. //depot/projects/newisp/netinet/tcp_var.h#3 integrate Differences ... ==== //depot/projects/newisp/amd64/amd64/machdep.c#2 (text+ko) ==== @@ -39,7 +39,7 @@ */ #include -__FBSDID("$FreeBSD: src/sys/amd64/amd64/machdep.c,v 1.651 2006/07/27 19:47:22 jhb Exp $"); +__FBSDID("$FreeBSD: src/sys/amd64/amd64/machdep.c,v 1.652 2006/09/07 15:03:02 jhb Exp $"); #include "opt_atalk.h" #include "opt_atpic.h" @@ -160,8 +160,10 @@ long Maxmem = 0; long realmem = 0; -vm_paddr_t phys_avail[20]; -vm_paddr_t dump_avail[20]; +#define PHYSMAP_SIZE (2 * 30) + +vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; +vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) @@ -832,8 +834,6 @@ } #endif -#define PHYSMAP_SIZE (2 * 20) - u_int basemem; /* ==== //depot/projects/newisp/i386/i386/machdep.c#2 (text+ko) ==== @@ -38,7 +38,7 @@ */ #include -__FBSDID("$FreeBSD: src/sys/i386/i386/machdep.c,v 1.633 2006/08/09 23:37:30 imp Exp $"); +__FBSDID("$FreeBSD: src/sys/i386/i386/machdep.c,v 1.634 2006/09/07 15:03:02 jhb Exp $"); #include "opt_apic.h" #include "opt_atalk.h" @@ -188,8 +188,10 @@ long Maxmem = 0; long realmem = 0; -vm_paddr_t phys_avail[10]; -vm_paddr_t dump_avail[10]; +#define PHYSMAP_SIZE (2 * 16) + +vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; +vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) @@ -1614,8 +1616,6 @@ ssd->ssd_gran = sd->sd_gran; } -#define PHYSMAP_SIZE (2 * 8) - /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and ==== //depot/projects/newisp/netinet/tcp_input.c#3 (text+ko) ==== @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 - * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.305 2006/09/06 21:51:58 andre Exp $ + * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.306 2006/09/07 13:06:00 ru Exp $ */ #include "opt_ipfw.h" /* for ipfw_fwd */ @@ -3187,7 +3187,7 @@ const int isipv6 = 0; #endif - /* tcbinfo lock required for tcp_twclose(), tcp_2msl_reset. */ + /* tcbinfo lock required for tcp_twclose(), tcp_timer_2msl_reset(). */ INP_INFO_WLOCK_ASSERT(&tcbinfo); INP_LOCK_ASSERT(inp); @@ -3256,7 +3256,7 @@ if (thflags & TH_FIN) { seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0); if (seq + 1 == tw->rcv_nxt) - tcp_timer_2msl_reset(tw, 2 * tcp_msl, 1); + tcp_timer_2msl_reset(tw, 1); } /* ==== //depot/projects/newisp/netinet/tcp_output.c#2 (text+ko) ==== @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * @(#)tcp_output.c 8.4 (Berkeley) 5/24/95 - * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.115 2006/02/23 21:14:34 qingli Exp $ + * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.116 2006/09/07 12:53:01 andre Exp $ */ #include "opt_inet.h" @@ -105,6 +105,10 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno, 0, "Enable NewReno Algorithms"); +int tcp_do_tso = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW, + &tcp_do_tso, 0, "Enable TCP Segmentation Offload"); + /* * Tcp output routine: figure out what should be sent and send it. */ @@ -127,6 +131,7 @@ int i, sack_rxmit; int sack_bytes_rxmt; struct sackhole *p; + int tso = 0; #if 0 int maxburst = TCP_MAXBURST; #endif @@ -376,12 +381,34 @@ /* * len will be >= 0 after this point. Truncate to the maximum - * segment length and ensure that FIN is removed if the length - * no longer contains the last data byte. + * segment length or enable TCP Segmentation Offloading (if supported + * by hardware) and ensure that FIN is removed if the length no longer + * contains the last data byte. + * + * TSO may only be used if we are in a pure bulk sending state. The + * presence of TCP-MD5, SACK retransmits, SACK advertizements and + * IP options prevent using TSO. With TSO the TCP header is the same + * (except for the sequence number) for all generated packets. This + * makes it impossible to transmit any options which vary per generated + * segment or packet. + * + * The length of TSO bursts is limited to TCP_MAXWIN. That limit and + * removal of FIN (if not already catched here) are handled later after + * the exact length of the TCP options are known. */ if (len > tp->t_maxseg) { - len = tp->t_maxseg; - sendalot = 1; + if ((tp->t_flags & TF_TSO) && tcp_do_tso && + ((tp->t_flags & TF_SIGNATURE) == 0) && + tp->rcv_numsacks == 0 && sack_rxmit == 0 && + tp->t_inpcb->inp_options == NULL && + tp->t_inpcb->in6p_options == NULL && + tp->t_inpcb->inp_sp == NULL) { + tso = 1; + } else { + len = tp->t_maxseg; + sendalot = 1; + tso = 0; + } } if (sack_rxmit) { if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc)) @@ -397,7 +424,7 @@ * Sender silly window avoidance. We transmit under the following * conditions when len is non-zero: * - * - We have a full segment + * - We have a full segment (or more with TSO) * - This is the last buffer in a write()/send() and we are * either idle or running NODELAY * - we've timed out (e.g. persist timer) @@ -406,7 +433,7 @@ * - we need to retransmit */ if (len) { - if (len == tp->t_maxseg) + if (len >= tp->t_maxseg) goto send; /* * NOTE! on localhost connections an 'ack' from the remote @@ -702,14 +729,24 @@ * bump the packet length beyond the t_maxopd length. * Clear the FIN bit because we cut off the tail of * the segment. + * + * When doing TSO limit a burst to TCP_MAXWIN and set the + * flag to continue sending and prevent the last segment + * from being fractional thus making them all equal sized. */ if (len + optlen + ipoptlen > tp->t_maxopd) { - /* - * If there is still more to send, don't close the connection. - */ flags &= ~TH_FIN; - len = tp->t_maxopd - optlen - ipoptlen; - sendalot = 1; + if (tso) { + if (len > TCP_MAXWIN) { + len = TCP_MAXWIN - TCP_MAXWIN % + (tp->t_maxopd - optlen); + sendalot = 1; + } else if (tp->t_flags & TF_NEEDFIN) + sendalot = 1; + } else { + len = tp->t_maxopd - optlen - ipoptlen; + sendalot = 1; + } } /*#ifdef DIAGNOSTIC*/ @@ -947,6 +984,16 @@ } /* + * Enable TSO and specify the size of the segments. + * The TCP pseudo header checksum is always provided. + * XXX: Fixme: This is currently not the case for IPv6. + */ + if (tso) { + m->m_pkthdr.csum_flags = CSUM_TSO; + m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen; + } + + /* * In transmit state, time the transmission and arrange for * the retransmit. In persist state, just set snd_max. */ @@ -1119,11 +1166,22 @@ } if (error == EMSGSIZE) { /* - * ip_output() will have already fixed the route - * for us. tcp_mtudisc() will, as its last action, - * initiate retransmission, so it is important to - * not do so here. + * For some reason the interface we used initially + * to send segments changed to another or lowered + * its MTU. + * + * tcp_mtudisc() will find out the new MTU and as + * its last action, initiate retransmission, so it + * is important to not do so here. + * + * If TSO was active we either got an interface + * without TSO capabilits or TSO was turned off. + * Disable it for this connection as too and + * immediatly retry with MSS sized segments generated + * by this function. */ + if (tso) + tp->t_flags &= ~TF_TSO; tcp_mtudisc(tp->t_inpcb, 0); return 0; } ==== //depot/projects/newisp/netinet/tcp_subr.c#4 (text+ko) ==== @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 - * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.260 2006/09/06 21:51:58 andre Exp $ + * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.261 2006/09/07 13:06:00 ru Exp $ */ #include "opt_compat.h" @@ -1736,7 +1736,7 @@ { struct tcptw *tw; struct inpcb *inp; - int tw_time, acknow; + int acknow; struct socket *so; INP_INFO_WLOCK_ASSERT(&tcbinfo); /* tcp_timer_2msl_reset(). */ @@ -1781,7 +1781,6 @@ * be used for fin-wait-2 state also, then we may need * a ts_recent from the last segment. */ - tw_time = 2 * tcp_msl; acknow = tp->t_flags & TF_ACKNOW; /* @@ -1803,7 +1802,7 @@ tcp_twrespond(tw, TH_ACK); inp->inp_ppcb = tw; inp->inp_vflag |= INP_TIMEWAIT; - tcp_timer_2msl_reset(tw, tw_time, 0); + tcp_timer_2msl_reset(tw, 0); /* * If the inpcb owns the sole reference to the socket, then we can ==== //depot/projects/newisp/netinet/tcp_timer.c#3 (text+ko) ==== @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 - * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.83 2006/09/06 13:56:35 glebius Exp $ + * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.85 2006/09/07 13:06:00 ru Exp $ */ #include "opt_inet6.h" @@ -230,46 +230,30 @@ } /* - * The timed wait lists contain references to each of the TCP sessions - * currently TIME_WAIT state. The list pointers, including the list pointers - * in each tcptw structure, are protected using the global tcbinfo lock, - * which must be held over list iteration and modification. + * The timed wait queue contains references to each of the TCP sessions + * currently in the TIME_WAIT state. The queue pointers, including the + * queue pointers in each tcptw structure, are protected using the global + * tcbinfo lock, which must be held over queue iteration and modification. */ -struct twlist { - LIST_HEAD(, tcptw) tw_list; - struct tcptw tw_tail; -}; -#define TWLIST_NLISTS 2 -static struct twlist twl_2msl[TWLIST_NLISTS]; -static struct twlist *tw_2msl_list[] = { &twl_2msl[0], &twl_2msl[1], NULL }; +static TAILQ_HEAD(, tcptw) twq_2msl; void tcp_timer_init(void) { - int i; - struct twlist *twl; - for (i = 0; i < TWLIST_NLISTS; i++) { - twl = &twl_2msl[i]; - LIST_INIT(&twl->tw_list); - LIST_INSERT_HEAD(&twl->tw_list, &twl->tw_tail, tw_2msl); - } + TAILQ_INIT(&twq_2msl); } void -tcp_timer_2msl_reset(struct tcptw *tw, int timeo, int rearm) +tcp_timer_2msl_reset(struct tcptw *tw, int rearm) { - int i; - struct tcptw *tw_tail; INP_INFO_WLOCK_ASSERT(&tcbinfo); INP_LOCK_ASSERT(tw->tw_inpcb); if (rearm) - LIST_REMOVE(tw, tw_2msl); - tw->tw_time = timeo + ticks; - i = timeo > tcp_msl ? 1 : 0; - tw_tail = &twl_2msl[i].tw_tail; - LIST_INSERT_BEFORE(tw_tail, tw, tw_2msl); + TAILQ_REMOVE(&twq_2msl, tw, tw_2msl); + tw->tw_time = ticks + 2 * tcp_msl; + TAILQ_INSERT_TAIL(&twq_2msl, tw, tw_2msl); } void @@ -277,31 +261,23 @@ { INP_INFO_WLOCK_ASSERT(&tcbinfo); - LIST_REMOVE(tw, tw_2msl); + TAILQ_REMOVE(&twq_2msl, tw, tw_2msl); } struct tcptw * tcp_timer_2msl_tw(int reuse) { - struct tcptw *tw, *tw_tail; - struct twlist *twl; - int i; + struct tcptw *tw; INP_INFO_WLOCK_ASSERT(&tcbinfo); - for (i = 0; i < TWLIST_NLISTS; i++) { - twl = tw_2msl_list[i]; - tw_tail = &twl->tw_tail; - - for (;;) { - tw = LIST_FIRST(&twl->tw_list); - if (tw == tw_tail || (!reuse && tw->tw_time > ticks)) - break; - INP_LOCK(tw->tw_inpcb); - tcp_twclose(tw, reuse); - if (reuse) - return (tw); - } - + for (;;) { + tw = TAILQ_FIRST(&twq_2msl); + if (tw == NULL || (!reuse && tw->tw_time > ticks)) + break; + INP_LOCK(tw->tw_inpcb); + tcp_twclose(tw, reuse); + if (reuse) + return (tw); } return (NULL); } ==== //depot/projects/newisp/netinet/tcp_timer.h#2 (text+ko) ==== @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 - * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.29 2006/08/11 21:15:23 mohans Exp $ + * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.30 2006/09/07 13:06:00 ru Exp $ */ #ifndef _NETINET_TCP_TIMER_H_ @@ -156,7 +156,7 @@ void tcp_timer_2msl(void *xtp); struct tcptw * tcp_timer_2msl_tw(int _reuse); /* XXX temporary */ -void tcp_timer_2msl_reset(struct tcptw *_tw, int _timeo, int rearm); +void tcp_timer_2msl_reset(struct tcptw *_tw, int rearm); void tcp_timer_2msl_stop(struct tcptw *_tw); void tcp_timer_keep(void *xtp); void tcp_timer_persist(void *xtp); ==== //depot/projects/newisp/netinet/tcp_var.h#3 (text+ko) ==== @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 - * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.135 2006/09/06 21:51:58 andre Exp $ + * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.136 2006/09/07 13:06:00 ru Exp $ */ #ifndef _NETINET_TCP_VAR_H_ @@ -276,7 +276,7 @@ u_long t_recent; u_long t_starttime; int tw_time; - LIST_ENTRY(tcptw) tw_2msl; + TAILQ_ENTRY(tcptw) tw_2msl; }; #define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb)