From owner-svn-src-projects@FreeBSD.ORG Wed Jul 1 17:06:58 2009 Return-Path: Delivered-To: svn-src-projects@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 2D144106568C; Wed, 1 Jul 2009 17:06:58 +0000 (UTC) (envelope-from lstewart@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 7BD518FC0A; Wed, 1 Jul 2009 17:06:56 +0000 (UTC) (envelope-from lstewart@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n61H6uY8018690; Wed, 1 Jul 2009 17:06:56 GMT (envelope-from lstewart@svn.freebsd.org) Received: (from lstewart@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n61H6uDB018684; Wed, 1 Jul 2009 17:06:56 GMT (envelope-from lstewart@svn.freebsd.org) Message-Id: <200907011706.n61H6uDB018684@svn.freebsd.org> From: Lawrence Stewart Date: Wed, 1 Jul 2009 17:06:56 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org X-SVN-Group: projects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r195248 - projects/tcp_cc_8.x/sys/netinet X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 01 Jul 2009 17:06:58 -0000 Author: lstewart Date: Wed Jul 1 17:06:56 2009 New Revision: 195248 URL: http://svn.freebsd.org/changeset/base/195248 Log: - Complete the cwnd_init -> conn_init hook function transition. - Hide newreno hook functions from global namespace and access functions through the newreno_cc_algo struct. Unfortunately, we can no longer use C99 initializers to do all the hook function assignments as a result of this change, so do the remainder in the mod_init hook. - Remove the no longer required htcp_after_idle, newreno_cb_init and newreno_conn_init functions. Modified: projects/tcp_cc_8.x/sys/netinet/cc_cubic.c projects/tcp_cc_8.x/sys/netinet/cc_htcp.c projects/tcp_cc_8.x/sys/netinet/cc_module.h projects/tcp_cc_8.x/sys/netinet/cc_newreno.c projects/tcp_cc_8.x/sys/netinet/tcp_input.c Modified: projects/tcp_cc_8.x/sys/netinet/cc_cubic.c ============================================================================== --- projects/tcp_cc_8.x/sys/netinet/cc_cubic.c Wed Jul 1 16:56:56 2009 (r195247) +++ projects/tcp_cc_8.x/sys/netinet/cc_cubic.c Wed Jul 1 17:06:56 2009 (r195248) @@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$"); #include /* function prototypes */ +int cubic_mod_init(void); int cubic_cb_init(struct tcpcb *tp); void cubic_cb_destroy(struct tcpcb *tp); void cubic_pre_fr(struct tcpcb *tp, struct tcphdr *th); @@ -93,23 +94,28 @@ MALLOC_DEFINE(M_CUBIC, "cubic data", /* function pointers for various hooks into the TCP stack */ struct cc_algo cubic_cc_algo = { .name = "cubic", + .mod_init = cubic_mod_init, .cb_init = cubic_cb_init, .cb_destroy = cubic_cb_destroy, .conn_init = cubic_conn_init, .ack_received = cubic_ack_received, .pre_fr = cubic_pre_fr, .post_fr = cubic_post_fr, - .after_idle = newreno_after_idle, .after_timeout = cubic_after_timeout }; +int +cubic_mod_init(void) +{ + cubic_cc_algo.after_idle = newreno_cc_algo.after_idle; + return (0); +} + void cubic_conn_init(struct tcpcb *tp) { struct cubic *cubic_data = CC_DATA(tp); - newreno_conn_init(tp); - /* * Ensure we have a sane initial value for max_cwnd recorded. * Without this here bad things happen when entries from @@ -253,7 +259,7 @@ cubic_ack_received(struct tcpcb *tp, str if ((tp->snd_cwnd < tp->snd_ssthresh) || (tp->snd_ssthresh == TCP_MAXWIN << TCP_MAX_WINSHIFT) || (cubic_data->min_rtt_ticks == TCPTV_SRTTBASE)) - newreno_ack_received(tp, th); + newreno_cc_algo.ack_received(tp, th); else { /* num ticks since last congestion */ ticks_since_cong = ticks - cubic_data->t_last_cong; @@ -339,7 +345,7 @@ cubic_after_timeout(struct tcpcb *tp) if (tp->t_rxtshift >= 2) cubic_data->t_last_cong = ticks; - newreno_after_timeout(tp); + newreno_cc_algo.after_timeout(tp); } /* Modified: projects/tcp_cc_8.x/sys/netinet/cc_htcp.c ============================================================================== --- projects/tcp_cc_8.x/sys/netinet/cc_htcp.c Wed Jul 1 16:56:56 2009 (r195247) +++ projects/tcp_cc_8.x/sys/netinet/cc_htcp.c Wed Jul 1 17:06:56 2009 (r195248) @@ -140,7 +140,6 @@ void htcp_pre_fr(struct tcpcb *tp, struc void htcp_post_fr(struct tcpcb *tp, struct tcphdr *th); void htcp_ack_received(struct tcpcb *tp, struct tcphdr *th); void htcp_after_timeout(struct tcpcb *tp); -void htcp_after_idle(struct tcpcb *tp); void htcp_ssthresh_update(struct tcpcb *tp); void htcp_record_rtt(struct tcpcb *tp); @@ -172,11 +171,9 @@ struct cc_algo htcp_cc_algo = { .mod_init = htcp_mod_init, .cb_init = htcp_cb_init, .cb_destroy = htcp_cb_destroy, - .conn_init = newreno_conn_init, .ack_received = htcp_ack_received, .pre_fr = htcp_pre_fr, .post_fr = htcp_post_fr, - .after_idle = htcp_after_idle, .after_timeout = htcp_after_timeout }; @@ -435,7 +432,7 @@ htcp_ack_received(struct tcpcb *tp, stru * Alpha will equal 1 for the first HTCP_DELTA_L ticks after the flow starts and after congestion */ if (htcp_data->alpha == 1 || tp->snd_cwnd < tp->snd_ssthresh) - newreno_ack_received(tp, th); + newreno_cc_algo.ack_received(tp, th); else { /* @@ -449,13 +446,6 @@ htcp_ack_received(struct tcpcb *tp, stru } } -void -htcp_after_idle(struct tcpcb *tp) -{ - printf("after_idle hook called\n"); - newreno_after_idle(tp); -} - /* * Reset the cwnd after a retransmission timeout */ @@ -475,7 +465,7 @@ htcp_after_timeout(struct tcpcb *tp) if (tp->t_rxtshift >= 2) htcp_data->t_last_cong = ticks; - newreno_after_timeout(tp); + newreno_cc_algo.after_timeout(tp); } /* @@ -545,6 +535,9 @@ skip: int htcp_mod_init(void) { + + htcp_cc_algo.after_idle = newreno_cc_algo.after_idle; + /* * the maximum time in ticks after a congestion event before alpha stops * increasing, due to the risk of overflow. Modified: projects/tcp_cc_8.x/sys/netinet/cc_module.h ============================================================================== --- projects/tcp_cc_8.x/sys/netinet/cc_module.h Wed Jul 1 16:56:56 2009 (r195247) +++ projects/tcp_cc_8.x/sys/netinet/cc_module.h Wed Jul 1 17:06:56 2009 (r195248) @@ -36,18 +36,6 @@ #ifndef _NETINET_CC_MODULE_H_ #define _NETINET_CC_MODULE_H_ -/* - * NewReno CC functions - */ -int newreno_cb_init(struct tcpcb *tp); -void newreno_conn_init(struct tcpcb *tp); -void newreno_ack_received(struct tcpcb *tp, struct tcphdr *th); -void newreno_pre_fr(struct tcpcb *tp, struct tcphdr *th); -void newreno_post_fr(struct tcpcb *tp, struct tcphdr *th); -void newreno_after_idle(struct tcpcb *tp); -void newreno_after_timeout(struct tcpcb *tp); -void newreno_ssthresh_update(struct tcpcb *tp); - #define DECLARE_CC_MODULE(ccname, ccalgo) \ static moduledata_t cc_##ccname = { \ #ccname, \ Modified: projects/tcp_cc_8.x/sys/netinet/cc_newreno.c ============================================================================== --- projects/tcp_cc_8.x/sys/netinet/cc_newreno.c Wed Jul 1 16:56:56 2009 (r195247) +++ projects/tcp_cc_8.x/sys/netinet/cc_newreno.c Wed Jul 1 17:06:56 2009 (r195248) @@ -47,94 +47,25 @@ __FBSDID("$FreeBSD$"); #include #include +/* + * NewReno CC functions + */ +void newreno_ack_received(struct tcpcb *tp, struct tcphdr *th); +void newreno_ssthresh_update(struct tcpcb *tp, struct tcphdr *th); +void newreno_post_fr(struct tcpcb *tp, struct tcphdr *th); +void newreno_after_idle(struct tcpcb *tp); +void newreno_after_timeout(struct tcpcb *tp); + /* newreno cc function pointers */ struct cc_algo newreno_cc_algo = { .name = "newreno", - .cb_init = newreno_cb_init, - .conn_init = newreno_conn_init, .ack_received = newreno_ack_received, - .pre_fr = newreno_pre_fr, + .pre_fr = newreno_ssthresh_update, .post_fr = newreno_post_fr, .after_idle = newreno_after_idle, .after_timeout = newreno_after_timeout }; -int -newreno_cb_init(struct tcpcb *tp) -{ - return 0; -} - -/* - * update ssthresh to approx 1/2 of cwnd - */ -void -newreno_ssthresh_update(struct tcpcb *tp) -{ - u_int win; - - /* reset ssthresh */ - win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; - - if (win < 2) - win = 2; - - tp->snd_ssthresh = win * tp->t_maxseg; -} - -/* - * initial cwnd at the start of a connection - * if there is a hostcache entry for the foreign host, base cwnd on that - * if rfc3390 is enabled, set cwnd to approx 4 MSS as recommended - * otherwise use the sysctl variables configured by the administrator - */ -void -newreno_conn_init(struct tcpcb *tp) -{ - struct hc_metrics_lite metrics; - struct inpcb *inp = tp->t_inpcb; - struct socket *so = inp->inp_socket; - - /* - * Set the slow-start flight size depending on whether this - * is a local network or not. - * - * Extend this so we cache the cwnd too and retrieve it here. - * Make cwnd even bigger than RFC3390 suggests but only if we - * have previous experience with the remote host. Be careful - * not make cwnd bigger than remote receive window or our own - * send socket buffer. Maybe put some additional upper bound - * on the retrieved cwnd. Should do incremental updates to - * hostcache when cwnd collapses so next connection doesn't - * overloads the path again. - * - * RFC3390 says only do this if SYN or SYN/ACK didn't got lost. - * We currently check only in syncache_socket for that. - */ - - tcp_hc_get(&inp->inp_inc, &metrics); - -#define TCP_METRICS_CWND -#ifdef TCP_METRICS_CWND - if (metrics.rmx_cwnd) - tp->snd_cwnd = max(tp->t_maxseg, - min(metrics.rmx_cwnd / 2, - min(tp->snd_wnd, so->so_snd.sb_hiwat))); - else -#endif - if (V_tcp_do_rfc3390) - tp->snd_cwnd = min(4 * tp->t_maxseg, max(2 * tp->t_maxseg, 4380)); -#ifdef INET6 - else if ((isipv6 && in6_localaddr(&inp->in6p_faddr)) || - (!isipv6 && in_localaddr(inp->inp_faddr))) -#else - else if (in_localaddr(inp->inp_faddr)) -#endif - tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local; - else - tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz; -} - /* * increase cwnd on receipt of a successful ACK * if cwnd <= ssthresh, increases by 1 MSS per ACK @@ -160,12 +91,22 @@ newreno_ack_received(struct tcpcb *tp, s } /* - * update the value of ssthresh before entering FR + * update ssthresh to approx 1/2 of cwnd + * argument "th" is unsued but required so that the function can + * masquerade as a pre_fr hook function */ void -newreno_pre_fr(struct tcpcb *tp, struct tcphdr *th) +newreno_ssthresh_update(struct tcpcb *tp, struct tcphdr *th) { - newreno_ssthresh_update(tp); + u_int win; + + /* reset ssthresh */ + win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; + + if (win < 2) + win = 2; + + tp->snd_ssthresh = win * tp->t_maxseg; } /* @@ -230,7 +171,7 @@ newreno_after_idle(struct tcpcb *tp) void newreno_after_timeout(struct tcpcb *tp) { - newreno_ssthresh_update(tp); + newreno_ssthresh_update(tp, NULL); /* * Close the congestion window down to one segment Modified: projects/tcp_cc_8.x/sys/netinet/tcp_input.c ============================================================================== --- projects/tcp_cc_8.x/sys/netinet/tcp_input.c Wed Jul 1 16:56:56 2009 (r195247) +++ projects/tcp_cc_8.x/sys/netinet/tcp_input.c Wed Jul 1 17:06:56 2009 (r195248) @@ -234,11 +234,83 @@ cc_ack_received(struct tcpcb *tp, struct static void inline cc_conn_init(struct tcpcb *tp) { + struct hc_metrics_lite metrics; + struct inpcb *inp = tp->t_inpcb; + struct socket *so = inp->inp_socket; + int rtt; +#ifdef INET6 + int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; +#endif + INP_WLOCK_ASSERT(tp->t_inpcb); + tcp_hc_get(&inp->inp_inc, &metrics); + + if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) { + tp->t_srtt = rtt; + tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE; + TCPSTAT_INC(tcps_usedrtt); + if (metrics.rmx_rttvar) { + tp->t_rttvar = metrics.rmx_rttvar; + TCPSTAT_INC(tcps_usedrttvar); + } else { + /* default variation is +- 1 rtt */ + tp->t_rttvar = + tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; + } + TCPT_RANGESET(tp->t_rxtcur, + ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, + tp->t_rttmin, TCPTV_REXMTMAX); + } + if (metrics.rmx_ssthresh) { + /* + * There's some sort of gateway or interface + * buffer limit on the path. Use this to set + * the slow start threshhold, but set the + * threshold to no less than 2*mss. + */ + tp->snd_ssthresh = max(2 * tp->t_maxseg, metrics.rmx_ssthresh); + TCPSTAT_INC(tcps_usedssthresh); + } + if (metrics.rmx_bandwidth) + tp->snd_bandwidth = metrics.rmx_bandwidth; + /* - * XXXLS: Should do ssthresh init in there as well + * Set the slow-start flight size depending on whether this + * is a local network or not. + * + * Extend this so we cache the cwnd too and retrieve it here. + * Make cwnd even bigger than RFC3390 suggests but only if we + * have previous experience with the remote host. Be careful + * not make cwnd bigger than remote receive window or our own + * send socket buffer. Maybe put some additional upper bound + * on the retrieved cwnd. Should do incremental updates to + * hostcache when cwnd collapses so next connection doesn't + * overloads the path again. + * + * RFC3390 says only do this if SYN or SYN/ACK didn't got lost. + * We currently check only in syncache_socket for that. */ +#define TCP_METRICS_CWND +#ifdef TCP_METRICS_CWND + if (metrics.rmx_cwnd) + tp->snd_cwnd = max(tp->t_maxseg, + min(metrics.rmx_cwnd / 2, + min(tp->snd_wnd, so->so_snd.sb_hiwat))); + else +#endif + if (V_tcp_do_rfc3390) + tp->snd_cwnd = min(4 * tp->t_maxseg, max(2 * tp->t_maxseg, +4380)); +#ifdef INET6 + else if ((isipv6 && in6_localaddr(&inp->in6p_faddr)) || + (!isipv6 && in_localaddr(inp->inp_faddr))) +#else + else if (in_localaddr(inp->inp_faddr)) +#endif + tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local; + else + tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz; if (CC_ALGO(tp)->conn_init != NULL) CC_ALGO(tp)->conn_init(tp); @@ -3214,15 +3286,13 @@ tcp_mss_update(struct tcpcb *tp, int off void tcp_mss(struct tcpcb *tp, int offer) { - int rtt, mss; + int mss; u_long bufsize; struct inpcb *inp; struct socket *so; struct hc_metrics_lite metrics; int mtuflags = 0; -#ifdef INET6 - int isipv6; -#endif + KASSERT(tp != NULL, ("%s: tp == NULL", __func__)); INIT_VNET_INET(tp->t_vnet); @@ -3230,9 +3300,6 @@ tcp_mss(struct tcpcb *tp, int offer) mss = tp->t_maxseg; inp = tp->t_inpcb; -#ifdef INET6 - isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; -#endif /* * If there's a pipesize, change the socket buffer to that size, @@ -3272,37 +3339,6 @@ tcp_mss(struct tcpcb *tp, int offer) (void)sbreserve_locked(&so->so_rcv, bufsize, so, NULL); } SOCKBUF_UNLOCK(&so->so_rcv); - /* - * While we're here, check the others too. - */ - if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) { - tp->t_srtt = rtt; - tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE; - TCPSTAT_INC(tcps_usedrtt); - if (metrics.rmx_rttvar) { - tp->t_rttvar = metrics.rmx_rttvar; - TCPSTAT_INC(tcps_usedrttvar); - } else { - /* default variation is +- 1 rtt */ - tp->t_rttvar = - tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; - } - TCPT_RANGESET(tp->t_rxtcur, - ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, - tp->t_rttmin, TCPTV_REXMTMAX); - } - if (metrics.rmx_ssthresh) { - /* - * There's some sort of gateway or interface - * buffer limit on the path. Use this to set - * the slow start threshhold, but set the - * threshold to no less than 2*mss. - */ - tp->snd_ssthresh = max(2 * mss, metrics.rmx_ssthresh); - TCPSTAT_INC(tcps_usedssthresh); - } - if (metrics.rmx_bandwidth) - tp->snd_bandwidth = metrics.rmx_bandwidth; /* Check the interface for TSO capabilities. */ if (mtuflags & CSUM_TSO)