From owner-svn-src-projects@FreeBSD.ORG Tue Jul 14 14:41:48 2009 Return-Path: Delivered-To: svn-src-projects@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 664AC106564A; Tue, 14 Jul 2009 14:41:48 +0000 (UTC) (envelope-from lstewart@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 53A468FC17; Tue, 14 Jul 2009 14:41:48 +0000 (UTC) (envelope-from lstewart@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n6EEfmwr062591; Tue, 14 Jul 2009 14:41:48 GMT (envelope-from lstewart@svn.freebsd.org) Received: (from lstewart@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n6EEfm7Z062588; Tue, 14 Jul 2009 14:41:48 GMT (envelope-from lstewart@svn.freebsd.org) Message-Id: <200907141441.n6EEfm7Z062588@svn.freebsd.org> From: Lawrence Stewart Date: Tue, 14 Jul 2009 14:41:48 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org X-SVN-Group: projects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r195678 - projects/tcp_cc_8.x/sys/netinet X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 14 Jul 2009 14:41:48 -0000 Author: lstewart Date: Tue Jul 14 14:41:48 2009 New Revision: 195678 URL: http://svn.freebsd.org/changeset/base/195678 Log: Cleanup CUBIC code to prepare for a standalone initial public release. Modified: projects/tcp_cc_8.x/sys/netinet/cc_cubic.c projects/tcp_cc_8.x/sys/netinet/cc_cubic.h Modified: projects/tcp_cc_8.x/sys/netinet/cc_cubic.c ============================================================================== --- projects/tcp_cc_8.x/sys/netinet/cc_cubic.c Tue Jul 14 11:53:21 2009 (r195677) +++ projects/tcp_cc_8.x/sys/netinet/cc_cubic.c Tue Jul 14 14:41:48 2009 (r195678) @@ -61,7 +61,6 @@ __FBSDID("$FreeBSD$"); #include #include -/* function prototypes */ int cubic_mod_init(void); int cubic_cb_init(struct tcpcb *tp); void cubic_cb_destroy(struct tcpcb *tp); @@ -75,15 +74,15 @@ void cubic_conn_init(struct tcpcb *tp); void cubic_record_rtt(struct tcpcb *tp); struct cubic { - /* cwnd at the most recent congestion event */ + /* cwnd at the most recent congestion event. */ u_long max_cwnd; - /* cwnd at the previous congestion event */ + /* cwnd at the previous congestion event. */ u_long prev_max_cwnd; - /* time of last congestion event in ticks */ + /* Time of last congestion event in ticks. */ u_long t_last_cong; - /* minimum observed rtt in ticks */ + /* Minimum observed rtt in ticks. */ u_long min_rtt_ticks; - /* number of congestion events */ + /* Number of congestion events. */ u_long num_cong_events; }; @@ -91,7 +90,6 @@ MALLOC_DECLARE(M_CUBIC); MALLOC_DEFINE(M_CUBIC, "cubic data", "Per connection data required for the CUBIC congestion algorithm"); -/* function pointers for various hooks into the TCP stack */ struct cc_algo cubic_cc_algo = { .name = "cubic", .mod_init = cubic_mod_init, @@ -125,9 +123,7 @@ cubic_conn_init(struct tcpcb *tp) } /* - * Initialise CUBIC on the specified TCP control block. Creates a - * new struct for CUBIC specific data and sticks a pointer to it - * in the control block + * Initialise CUBIC on the specified TCP control block. */ int cubic_cb_init(struct tcpcb *tp) @@ -137,9 +133,9 @@ cubic_cb_init(struct tcpcb *tp) cubic_data = malloc(sizeof(struct cubic), M_CUBIC, M_NOWAIT); if (cubic_data == NULL) - return 1; + return (ENOMEM); - /* init some key cubic values with sensible defaults */ + /* Init some key variables with sensible defaults. */ cubic_data->t_last_cong = ticks; cubic_data->min_rtt_ticks = TCPTV_SRTTBASE; cubic_data->max_cwnd = 0; @@ -147,8 +143,8 @@ cubic_cb_init(struct tcpcb *tp) cubic_data->num_cong_events = 0; CC_DATA(tp) = cubic_data; - - return 0; + + return (0); } /* @@ -163,7 +159,7 @@ cubic_cb_destroy(struct tcpcb *tp) } /* - * Perform any necesary tasks before we enter fast recovery + * Perform any necesary tasks before we enter fast recovery. */ void cubic_pre_fr(struct tcpcb *tp, struct tcphdr *th) @@ -175,12 +171,10 @@ cubic_pre_fr(struct tcpcb *tp, struct tc cubic_ssthresh_update(tp); /* - * record the current cwnd at the point of congestion so it can be used - * as the basis for resetting cwnd after exiting fr + * Record the current cwnd at the point of congestion so it can be used + * as the basis for resetting cwnd after exiting FR. */ cubic_data->max_cwnd = tp->snd_cwnd; - - printf("congestion event started (max_cwnd: %ld)\n", cubic_data->max_cwnd); } /* @@ -191,15 +185,11 @@ cubic_post_fr(struct tcpcb *tp, struct t { struct cubic *cubic_data = CC_DATA(tp); - /* - * grab the current time and record it so we know when the most recent - * congestion event was - */ + /* Record the current time as the most recent congestion event. */ cubic_data->t_last_cong = ticks; - /* fast convergence heuristic */ + /* Fast convergence heuristic. */ if (cubic_data->max_cwnd < cubic_data->prev_max_cwnd) { - printf("fast convergence heuristic kicked in! (max_cwnd: %d\t prev_max_cwnd: %d\n", (int)cubic_data->max_cwnd, (int)cubic_data->prev_max_cwnd); cubic_data->prev_max_cwnd = cubic_data->max_cwnd; cubic_data->max_cwnd = (cubic_data->max_cwnd * CUBIC_FC_FACTOR) >> CUBIC_SHIFT; @@ -208,20 +198,16 @@ cubic_post_fr(struct tcpcb *tp, struct t cubic_data->prev_max_cwnd = cubic_data->max_cwnd; /* - * if inflight data is less than ssthresh, set cwnd conservatively + * If inflight data is less than ssthresh, set cwnd conservatively * to avoid a burst of data, as suggested in the NewReno RFC. * Otherwise, use the CUBIC method. */ if (th && SEQ_GT(th->th_ack + tp->snd_ssthresh, tp->snd_max)) tp->snd_cwnd = tp->snd_max - th->th_ack + tp->t_maxseg; - else { - /* update cwnd based on beta and adjusted max_cwnd */ + else + /* Update cwnd based on beta and adjusted max_cwnd. */ tp->snd_cwnd = max(1,((CUBIC_BETA * cubic_data->max_cwnd) >> CUBIC_SHIFT)); - printf("cubic_post_fr - cwnd: %ld\tmax_cwnd: %ld\n", tp->snd_cwnd, cubic_data->max_cwnd); - } - - printf("congestion event over\n"); } void @@ -230,12 +216,12 @@ cubic_record_rtt(struct tcpcb *tp) struct cubic *cubic_data = CC_DATA(tp); int t_srtt_ticks = tp->t_srtt / TCP_RTT_SCALE; - /* XXX: Should there be some hysteresis for minrtt? */ + /* XXXLS: Should there be some hysteresis for minrtt? */ /* - * record the current SRTT as our minrtt if it's the smallest we've + * Record the current SRTT as our minrtt if it's the smallest we've * seen or minrtt is currently equal to its initialised value. - * Ignore srtt until a min number of samples have been taken + * Ignore srtt until a min number of samples have been taken. */ if ((t_srtt_ticks < cubic_data->min_rtt_ticks || cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) && @@ -251,8 +237,6 @@ cubic_ack_received(struct tcpcb *tp, str { struct cubic *cubic_data = CC_DATA(tp); u_long w_newreno, w_cubic_next, ticks_since_cong; - static u_long last_print_ticks = 0; - int print = 0; cubic_record_rtt(tp); @@ -261,73 +245,46 @@ cubic_ack_received(struct tcpcb *tp, str (cubic_data->min_rtt_ticks == TCPTV_SRTTBASE)) newreno_cc_algo.ack_received(tp, th); else { - /* num ticks since last congestion */ + /* Ticks since last congestion. */ ticks_since_cong = ticks - cubic_data->t_last_cong; - if ((ticks - last_print_ticks) >= (cubic_data->min_rtt_ticks / 2)) { - - print = 1; - last_print_ticks = ticks; - } - - if (print) - printf("rtt_ticks: %ld\tticks_since_cong: %ld\n", cubic_data->min_rtt_ticks, ticks_since_cong); - w_newreno = reno_cwnd( ticks_since_cong, cubic_data->min_rtt_ticks, cubic_data->max_cwnd, tp->t_maxseg ); - if (print) - printf("reno_cwnd(%ld,%ld,%ld,%d): %ld\n", ticks_since_cong, cubic_data->min_rtt_ticks, cubic_data->max_cwnd, tp->t_maxseg, w_newreno); - - //w_cubic = cubic_cwnd(ticks_since_cong, cubic_data->max_cwnd, tp->t_maxseg); - //printf("cubic_cwnd(%ld,%ld,%d): %ld (w_cubic)\n", ticks_since_cong, cubic_data->max_cwnd, tp->t_maxseg, w_cubic); - w_cubic_next = cubic_cwnd( ticks_since_cong + - cubic_data->min_rtt_ticks, + cubic_data->min_rtt_ticks, cubic_data->max_cwnd, tp->t_maxseg ); - - if (print) - printf("cubic_cwnd(%ld,%ld,%d): %ld (w_cubic_next)\n", ticks_since_cong + cubic_data->min_rtt_ticks, cubic_data->max_cwnd, tp->t_maxseg, w_cubic_next); - - if (print) - printf("pre\tmax_cwnd: %ld\tsnd_cwnd: %ld\tw_newreno: %ld\tw_cubic_next: %ld\n", cubic_data->max_cwnd, tp->snd_cwnd, w_newreno, w_cubic_next); - - if (w_cubic_next < w_newreno) { - /* we are in TCP-friendly region, follow reno cwnd growth */ + if (w_cubic_next < w_newreno) + /* TCP-friendly region, follow reno cwnd growth. */ tp->snd_cwnd = w_newreno; - } else if (tp->snd_cwnd < w_cubic_next) { - /* else we are in the concave or convex growth regions */ - if (print) - printf("incr: %lu\n", (w_cubic_next * tp->t_maxseg) / tp->snd_cwnd); - //tp->snd_cwnd += max(1, (w_cubic_next - w_cubic) / tp->snd_cwnd); - //printf("incr: %d\n", max(1, (w_cubic_next - tp->snd_cwnd) / tp->t_maxseg)); - /* XXX: Test under what conditions the following will truncate */ - tp->snd_cwnd += (u_long)(((uint64_t)(w_cubic_next * tp->t_maxseg)) / tp->snd_cwnd); - } + else if (tp->snd_cwnd < w_cubic_next) + /* Concave or convex region, follow CUBIC cwnd growth. + * XXXLS: Test under what conditions + * the following will truncate. + */ + tp->snd_cwnd += (u_long)(((uint64_t)(w_cubic_next + * tp->t_maxseg)) / tp->snd_cwnd); /* - * if we're not in slow start and we're probing for a new cwnd limit + * If we're not in slow start and we're probing for a new cwnd limit * at the start of a connection (happens when hostcache has a relevant entry), - * keep updating our current estimate of the max_cwnd + * keep updating our current estimate of the max_cwnd. */ - if (cubic_data->num_cong_events == 0 && - cubic_data->max_cwnd < tp->snd_cwnd) + if (cubic_data->num_cong_events == 0 + && cubic_data->max_cwnd < tp->snd_cwnd) cubic_data->max_cwnd = tp->snd_cwnd; - - if (print) - printf("post\tmax_cwnd: %ld\tsnd_cwnd: %ld\n\n", cubic_data->max_cwnd, tp->snd_cwnd); } } /* - * Reset the cwnd after a retransmission timeout + * Reset the cwnd after a retransmission timeout. */ void cubic_after_timeout(struct tcpcb *tp) @@ -337,7 +294,7 @@ cubic_after_timeout(struct tcpcb *tp) cubic_ssthresh_update(tp); /* - * grab the current time and record it so we know when the most recent + * Grab the current time and record it so we know when the most recent * congestion event was. Only record it when the timeout has fired more * than once, as there is a reasonable chance the first one is a false alarm * and may not indicate congestion. @@ -355,7 +312,7 @@ void cubic_ssthresh_update(struct tcpcb *tp) { /* - * on the first congestion event, set ssthresh to cwnd * 0.5, on + * On the first congestion event, set ssthresh to cwnd * 0.5, on * subsequent congestion events, set it to cwnd * beta. */ if (tp->snd_ssthresh == (TCP_MAXWIN << TCP_MAX_WINSHIFT)) Modified: projects/tcp_cc_8.x/sys/netinet/cc_cubic.h ============================================================================== --- projects/tcp_cc_8.x/sys/netinet/cc_cubic.h Tue Jul 14 11:53:21 2009 (r195677) +++ projects/tcp_cc_8.x/sys/netinet/cc_cubic.h Tue Jul 14 14:41:48 2009 (r195678) @@ -34,33 +34,30 @@ #ifndef _NETINET_CC_CUBIC_H_ #define _NETINET_CC_CUBIC_H_ -/* needed for rdtsc() (performance analysis) */ -#include - -/* Number of bits of precision for fixed point math calcs */ +/* Number of bits of precision for fixed point math calcs. */ #define CUBIC_SHIFT 8 #define CUBIC_SHIFT_4 32 -/* 0.5 with a shift << 8 */ +/* 0.5 with a shift << 8. */ #define RENO_BETA 128 -/* ~0.8 with a shift << 8 */ +/* ~0.8 with a shift << 8. */ #define CUBIC_BETA 204 -/* ~0.2 with a shift << 8 */ +/* ~0.2 with a shift << 8. */ #define ONE_MINUS_CUBIC_BETA 51 -/* ~0.4 with a shift << 8 */ +/* ~0.4 with a shift << 8. */ #define CUBIC_C_FACTOR 102 -/* CUBIC fast convergence factor ~0.9 with a shift << 8 */ +/* CUBIC fast convergence factor ~0.9 with a shift << 8. */ #define CUBIC_FC_FACTOR 230 -/* Don't trust s_rtt until this many rtt samples have been taken */ +/* Don't trust s_rtt until this many rtt samples have been taken. */ #define CUBIC_MIN_RTT_SAMPLES 8 -/* Userspace only bits */ +/* Userspace only bits. */ #ifndef _KERNEL extern int hz; @@ -89,38 +86,40 @@ theoretical_reno_cwnd( u_long ticks_sinc return (wmax * 0.5) + ((ticks_since_cong/(float)rtt_ticks) * smss); } -#endif /* _KERNEL*/ +#endif /* !_KERNEL */ /* - * calc K (adapted from Apple Computer Technical Report #KT-32) + * Compute the CUBIC K value used in the cwnd calculation, using an + * implementation of equation 2 in draft-rhee-tcpm-cubic-02. The method used + * here is adapted from Apple Computer Technical Report #KT-32. */ static __inline -long cubic_k(u_long wmax_pkts) +int64_t cubic_k(u_long wmax_pkts) { + register int64_t s = 0, K = 0; register uint16_t p = 0; - register long s = 0, K = 0; - /* (wmax * beta)/C with CUBIC_SHIFT worth of precision */ + /* (wmax * beta)/C with CUBIC_SHIFT worth of precision. */ s = ((wmax_pkts * ONE_MINUS_CUBIC_BETA) << CUBIC_SHIFT) / CUBIC_C_FACTOR; - //printf("s: %d\n", s); + /* printf("s: %lld\n", s); */ /* - * rebase s such that it is between 1 and 1/8 with - * a shift of CUBIC_SHIFT + * Rebase s such that it is between 1 and 1/8 with + * a shift of CUBIC_SHIFT. */ while (s >= 256) { - s >>= 3; /* divide by 8 */ + s >>= 3; p++; } - /* s is now between 1/8 and 1 (shifted by CUBIC_SHIFT) */ - - //printf("rebased s: %d\n", s); + /* s is now between 1/8 and 1 (shifted by CUBIC_SHIFT). */ + /* printf("rebased s: %lld\n", s); */ /* - * Some magic constants taken from the Apple TR with appropriate shifts + * Some magic constants taken from the Apple TR with + * appropriate shifts: * 275 == 1.072302 << CUBIC_SHIFT (8) * 98 == 0.3812513 << CUBIC_SHIFT (8) * 120 == 0.46946116 << CUBIC_SHIFT (8) @@ -129,56 +128,55 @@ long cubic_k(u_long wmax_pkts) (((s * s * 120) >> CUBIC_SHIFT) >> CUBIC_SHIFT); /* - * multiply by 2^p to undo the "divide by 8" transform from the - * previous while loop + * Multiply by 2^p to undo the "divide by 8" transform from the + * while loop. */ return (K <<= p); } /* - * Acknowledgments: Kip Macy + * Compute the new CUBIC cwnd value using an implementation of equation 1 in + * draft-rhee-tcpm-cubic-02. + * XXXLS: Characterise bounds for overflow. + * Debugging acknowledgments: Kip Macy */ static __inline u_long cubic_cwnd(u_long ticks_since_cong, u_long wmax, u_int smss) { - long K; - int64_t cwnd; - //int64_t start, end; - - //start = rdtsc(); + int64_t cwnd, K; + /* + * int64_t start, end; + * start = rdtsc(); + */ K = cubic_k(wmax / smss); - //printf("K: %d\n", K); + /* K is in fixed point form with CUBIC_SHIFT worth of precision */ - /* - * we now have K in fixed point form with - * CUBIC_SHIFT worth of precision - */ - - /* t - K, with CUBIC_SHIFT worth of precision */ + /* t - K, with CUBIC_SHIFT worth of precision. */ cwnd = ((int64_t)(ticks_since_cong << CUBIC_SHIFT) - (K * hz)) / hz; - //printf("t-k: %lld\n", cwnd); + /* printf("t-k: %lld\n", cwnd); */ - /* (t - K)^3, with CUBIC_SHIFT^3 worth of precision */ + /* (t - K)^3, with CUBIC_SHIFT^3 worth of precision. */ cwnd *= (cwnd * cwnd); - //printf("(t-k)^3: %lld\n", cwnd); + /* printf("(t-k)^3: %lld\n", cwnd); */ /* * C(t - K)^3 + wmax * The down shift by CUBIC_SHIFT_4 is because cwnd has 4 lots of * CUBIC_SHIFT included in the value. 3 from the cubing of cwnd above, - * and an extra from multiplying through by CUBIC_C_FACTOR + * and an extra from multiplying through by CUBIC_C_FACTOR. */ cwnd = ((cwnd * CUBIC_C_FACTOR * smss) >> CUBIC_SHIFT_4) + wmax; - //printf("final cwnd: %lld\n", cwnd); - - //end = rdtsc(); + /* printf("final cwnd: %lld\n", cwnd); */ - //printf("%lld TSC ticks\n", end - start); + /* + * end = rdtsc(); + * printf("%lld TSC ticks\n", end - start); + */ return ((u_long)cwnd); } @@ -191,7 +189,7 @@ u_long reno_cwnd(u_long ticks_since_cong * Simplified form of equation 4 from I-D * For reno, beta = 0.5, therefore W_tcp(t) = wmax*0.5 + t/RTT * Equation 4 deals with cwnd/wmax in pkts, so because our cwnd is - * in bytes, we have to multiply by smss + * in bytes, we have to multiply by smss. */ return (((wmax * RENO_BETA) + (((ticks_since_cong * smss) << CUBIC_SHIFT) / rtt_ticks)) >> CUBIC_SHIFT);