From owner-svn-src-stable@FreeBSD.ORG Fri Feb 10 20:19:39 2012 Return-Path: Delivered-To: svn-src-stable@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 68F271065675; Fri, 10 Feb 2012 20:19:39 +0000 (UTC) (envelope-from tuexen@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 51AFA8FC15; Fri, 10 Feb 2012 20:19:39 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q1AKJd0v092354; Fri, 10 Feb 2012 20:19:39 GMT (envelope-from tuexen@svn.freebsd.org) Received: (from tuexen@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q1AKJdMJ092347; Fri, 10 Feb 2012 20:19:39 GMT (envelope-from tuexen@svn.freebsd.org) Message-Id: <201202102019.q1AKJdMJ092347@svn.freebsd.org> From: Michael Tuexen Date: Fri, 10 Feb 2012 20:19:39 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org X-SVN-Group: stable-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r231424 - stable/8/sys/netinet X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 10 Feb 2012 20:19:39 -0000 Author: tuexen Date: Fri Feb 10 20:19:38 2012 New Revision: 231424 URL: http://svn.freebsd.org/changeset/base/231424 Log: MFC r219120: Adds a new Congestion Control that helps reduce the RTT that a flow will build up in buffers in transit. It is a slight modification to RFC2581 but is more friendly i.e. less aggressive. From rrs@. Modified: stable/8/sys/netinet/sctp.h stable/8/sys/netinet/sctp_cc_functions.c stable/8/sys/netinet/sctp_constants.h stable/8/sys/netinet/sctp_structs.h stable/8/sys/netinet/sctp_sysctl.h stable/8/sys/netinet/sctp_usrreq.c Directory Properties: stable/8/sys/ (props changed) stable/8/sys/amd64/include/xen/ (props changed) stable/8/sys/boot/ (props changed) stable/8/sys/cddl/contrib/opensolaris/ (props changed) stable/8/sys/contrib/dev/acpica/ (props changed) stable/8/sys/contrib/pf/ (props changed) stable/8/sys/dev/e1000/ (props changed) Modified: stable/8/sys/netinet/sctp.h ============================================================================== --- stable/8/sys/netinet/sctp.h Fri Feb 10 20:17:28 2012 (r231423) +++ stable/8/sys/netinet/sctp.h Fri Feb 10 20:19:38 2012 (r231424) @@ -258,6 +258,11 @@ struct sctp_paramhdr { #define SCTP_CC_HSTCP 0x00000001 /* HTCP Congestion Control */ #define SCTP_CC_HTCP 0x00000002 +/* RTCC Congestion Control - RFC2581 plus */ +#define SCTP_CC_RTCC 0x00000003 + +#define SCTP_CC_OPT_RTCC_SETMODE 0x00002000 +#define SCTP_CC_OPT_USE_DCCC_ECN 0x00002001 /* RS - Supported stream scheduling modules for pluggable * stream scheduling Modified: stable/8/sys/netinet/sctp_cc_functions.c ============================================================================== --- stable/8/sys/netinet/sctp_cc_functions.c Fri Feb 10 20:17:28 2012 (r231423) +++ stable/8/sys/netinet/sctp_cc_functions.c Fri Feb 10 20:19:38 2012 (r231424) @@ -185,10 +185,203 @@ sctp_cwnd_update_after_fr(struct sctp_tc } } + +/* RTCC Algoritm to limit growth of cwnd, return + * true if you want to NOT allow cwnd growth + */ +static int +cc_bw_limit(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw) +{ + uint64_t bw_offset, rtt_offset, rtt, vtag, probepoint; + + /*- + * Here we need to see if we want + * to limit cwnd growth due to increase + * in overall rtt but no increase in bw. + * We use the following table to figure + * out what we should do. When we return + * 0, cc update goes on as planned. If we + * return 1, then no cc update happens and cwnd + * stays where it is at. + * ---------------------------------- + * BW | RTT | Action + * ********************************* + * INC | INC | return 0 + * ---------------------------------- + * INC | SAME | return 0 + * ---------------------------------- + * INC | DECR | return 0 + * ---------------------------------- + * SAME | INC | return 1 + * ---------------------------------- + * SAME | SAME | return 1 + * ---------------------------------- + * SAME | DECR | return 0 + * ---------------------------------- + * DECR | INC | return 0 or 1 based on if we caused. + * ---------------------------------- + * DECR | SAME | return 0 + * ---------------------------------- + * DECR | DECR | return 0 + * ---------------------------------- + * + * We are a bit fuzz on what an increase or + * decrease is. For BW it is the same if + * it did not change within 1/64th. For + * RTT it stayed the same if it did not + * change within 1/32nd + */ + rtt = stcb->asoc.my_vtag; + vtag = (rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | (stcb->rport); + probepoint = (((uint64_t) net->cwnd) << 32); + rtt = net->rtt; + bw_offset = net->cc_mod.rtcc.lbw >> SCTP_BASE_SYSCTL(sctp_rttvar_bw); + if (nbw > net->cc_mod.rtcc.lbw + bw_offset) { + /* + * BW increased, so update and return 0, since all actions + * in our table say to do the normal CC update + */ + /* PROBE POINT 0 */ + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + net->cc_mod.rtcc.lbw_rtt, + rtt, + probepoint); + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = rtt; + net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; + return (0); + } + rtt_offset = net->cc_mod.rtcc.lbw_rtt >> SCTP_BASE_SYSCTL(sctp_rttvar_rtt); + if (nbw < net->cc_mod.rtcc.lbw - bw_offset) { + /* Bandwidth decreased. */ + if (rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) { + /* rtt increased */ + /* Did we add more */ + if (net->cwnd > net->cc_mod.rtcc.cwnd_at_bw_set) { + /* We caused it maybe.. back off */ + /* PROBE POINT 1 */ + probepoint |= ((1 << 16) | 1); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + net->cc_mod.rtcc.lbw_rtt, + rtt, + probepoint); + + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = rtt; + net->cwnd = net->cc_mod.rtcc.cwnd_at_bw_set; + if (net->cc_mod.rtcc.ret_from_eq) { + /* + * Switch over to CA if we are less + * aggressive + */ + net->ssthresh = net->cwnd - 1; + net->partial_bytes_acked = 0; + } + return (1); + } + /* Probe point 2 */ + probepoint |= ((2 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + net->cc_mod.rtcc.lbw_rtt, + rtt, + probepoint); + + /* Someone else - fight for more? */ + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = rtt; + net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; + return (0); + } else if (rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) { + /* rtt decreased */ + /* Probe point 3 */ + probepoint |= ((3 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + net->cc_mod.rtcc.lbw_rtt, + rtt, + probepoint); + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = rtt; + net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; + return (0); + } + /* The bw decreased but rtt stayed the same */ + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = rtt; + net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; + /* Probe point 4 */ + probepoint |= ((4 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + net->cc_mod.rtcc.lbw_rtt, + rtt, + probepoint); + return (0); + } + /* + * If we reach here then we are in a situation where the bw stayed + * the same. + */ + if (rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) { + /* + * rtt increased we don't update bw.. so we don't update the + * rtt either. + */ + /* Probe point 5 */ + probepoint |= ((5 << 16) | 1); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + net->cc_mod.rtcc.lbw_rtt, + rtt, + probepoint); + return (1); + } + if (rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) { + /* + * rtt decreased, there could be more room. we update both + * the bw and the rtt here. + */ + /* Probe point 6 */ + probepoint |= ((6 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + net->cc_mod.rtcc.lbw_rtt, + rtt, + probepoint); + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = rtt; + net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; + return (0); + } + /* + * Ok bw and rtt remained the same .. no update to any but save the + * latest cwnd. + */ + /* Probe point 7 */ + probepoint |= ((7 << 16) | net->cc_mod.rtcc.ret_from_eq); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | nbw), + net->cc_mod.rtcc.lbw_rtt, + rtt, + probepoint); + return ((int)net->cc_mod.rtcc.ret_from_eq); +} + static void -sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, +sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb, struct sctp_association *asoc, - int accum_moved, int reneged_all, int will_exit) + int accum_moved, int reneged_all, int will_exit, int use_rtcc) { struct sctp_nets *net; int old_cwnd; @@ -327,6 +520,48 @@ sctp_cwnd_update_after_sack(struct sctp_ goto skip_cwnd_update; } /* + * Did any measurements go on for this network? + */ + if (use_rtcc && (net->cc_mod.rtcc.tls_needs_set > 0)) { + uint64_t nbw; + + /* + * At this point our bw_bytes has been updated by + * incoming sack information. + * + * But our bw may not yet be set. + * + */ + if ((net->cc_mod.rtcc.new_tot_time / 1000) > 0) { + nbw = net->cc_mod.rtcc.bw_bytes / (net->cc_mod.rtcc.new_tot_time / 1000); + } else { + nbw = net->cc_mod.rtcc.bw_bytes; + } + if (net->cc_mod.rtcc.lbw) { + if (cc_bw_limit(stcb, net, nbw)) { + /* Hold here, no update */ + goto skip_cwnd_update; + } + } else { + uint64_t vtag, probepoint; + + probepoint = (((uint64_t) net->cwnd) << 32); + probepoint |= ((0xa << 16) | 0); + vtag = (net->rtt << 32) | + (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | + (stcb->rport); + + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + nbw, + 0, + net->rtt, + probepoint); + net->cc_mod.rtcc.lbw = nbw; + net->cc_mod.rtcc.lbw_rtt = net->rtt; + } + } + /* * CMT: CUC algorithm. Update cwnd if pseudo-cumack has * moved. */ @@ -481,31 +716,67 @@ sctp_cwnd_update_after_timeout(struct sc } } - static void -sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, - int in_window, int num_pkt_lost) +sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb *stcb, struct sctp_nets *net, + int in_window, int num_pkt_lost, int use_rtcc) { int old_cwnd = net->cwnd; - if (in_window == 0) { - SCTP_STAT_INCR(sctps_ecnereducedcwnd); - net->ssthresh = net->cwnd / 2; - if (net->ssthresh < net->mtu) { - net->ssthresh = net->mtu; - /* here back off the timer as well, to slow us down */ - net->RTO <<= 1; + if ((use_rtcc) && (net->lan_type == SCTP_LAN_LOCAL) && (net->cc_mod.rtcc.use_dccc_ecn)) { + /* Data center Congestion Control */ + if (in_window == 0) { + /* + * Go to CA with the cwnd at the point we sent the + * TSN that was marked with a CE. + */ + if (net->ecn_prev_cwnd < net->cwnd) { + /* Restore to prev cwnd */ + net->cwnd = net->ecn_prev_cwnd - (net->mtu * num_pkt_lost); + } else { + /* Just cut in 1/2 */ + net->cwnd /= 2; + } + /* Drop to CA */ + net->ssthresh = net->cwnd - (num_pkt_lost * net->mtu); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + } + } else { + /* + * Further tuning down required over the drastic + * orginal cut + */ + net->ssthresh -= (net->mtu * num_pkt_lost); + net->cwnd -= (net->mtu * num_pkt_lost); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + } } - net->cwnd = net->ssthresh; - SDT_PROBE(sctp, cwnd, net, ecn, - stcb->asoc.my_vtag, - ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), - net, - old_cwnd, net->cwnd); - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + SCTP_STAT_INCR(sctps_ecnereducedcwnd); + } else { + if (in_window == 0) { + SCTP_STAT_INCR(sctps_ecnereducedcwnd); + net->ssthresh = net->cwnd / 2; + if (net->ssthresh < net->mtu) { + net->ssthresh = net->mtu; + /* + * here back off the timer as well, to slow + * us down + */ + net->RTO <<= 1; + } + net->cwnd = net->ssthresh; + SDT_PROBE(sctp, cwnd, net, ecn, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); + } } } + } static void @@ -636,14 +907,16 @@ sctp_cwnd_update_after_output(struct sct if (net->ssthresh < net->cwnd) net->ssthresh = net->cwnd; - net->cwnd = (net->flight_size + (burst_limit * net->mtu)); - SDT_PROBE(sctp, cwnd, net, bl, - stcb->asoc.my_vtag, - ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), - net, - old_cwnd, net->cwnd); - if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST); + if (burst_limit) { + net->cwnd = (net->flight_size + (burst_limit * net->mtu)); + SDT_PROBE(sctp, cwnd, net, bl, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { + sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST); + } } } @@ -673,6 +946,212 @@ sctp_cwnd_update_after_fr_timer(struct s } } +static void +sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, + struct sctp_association *asoc, + int accum_moved, int reneged_all, int will_exit) +{ + /* Passing a zero argument in last disables the rtcc algoritm */ + sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 0); +} + +static void +sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, + int in_window, int num_pkt_lost) +{ + /* Passing a zero argument in last disables the rtcc algoritm */ + sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 0); +} + +/* Here starts the RTCCVAR type CC invented by RRS which + * is a slight mod to RFC2581. We reuse a common routine or + * two since these algoritms are so close and need to + * remain the same. + */ +static void +sctp_cwnd_update_rtcc_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, + int in_window, int num_pkt_lost) +{ + sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 1); +} + + +static +void +sctp_cwnd_update_rtcc_tsn_acknowledged(struct sctp_nets *net, + struct sctp_tmit_chunk *tp1) +{ + net->cc_mod.rtcc.bw_bytes += tp1->send_size; +} + +static void +sctp_cwnd_prepare_rtcc_net_for_sack(struct sctp_tcb *stcb, + struct sctp_nets *net) +{ + if (net->cc_mod.rtcc.tls_needs_set > 0) { + /* We had a bw measurment going on */ + struct timeval ltls; + + SCTP_GETPTIME_TIMEVAL(<ls); + timevalsub(<ls, &net->cc_mod.rtcc.tls); + net->cc_mod.rtcc.new_tot_time = (ltls.tv_sec * 1000000) + ltls.tv_usec; + } +} + +static void +sctp_cwnd_new_rtcc_transmission_begins(struct sctp_tcb *stcb, + struct sctp_nets *net) +{ + uint64_t vtag, probepoint; + + if (net->cc_mod.rtcc.lbw) { + /* Clear the old bw.. we went to 0 in-flight */ + vtag = (net->rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | + (stcb->rport); + probepoint = (((uint64_t) net->cwnd) << 32); + /* Probe point 8 */ + probepoint |= ((8 << 16) | 0); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + ((net->cc_mod.rtcc.lbw << 32) | 0), + net->cc_mod.rtcc.lbw_rtt, + 0, + probepoint); + net->cc_mod.rtcc.lbw_rtt = 0; + net->cc_mod.rtcc.cwnd_at_bw_set = 0; + net->cc_mod.rtcc.lbw = 0; + net->cc_mod.rtcc.bw_tot_time = 0; + net->cc_mod.rtcc.bw_bytes = 0; + net->cc_mod.rtcc.tls_needs_set = 0; + if (net->cc_mod.rtcc.ret_from_eq) { + /* less aggressive one - reset cwnd too */ + uint32_t cwnd_in_mtu, cwnd; + + cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd); + if (cwnd_in_mtu == 0) { + /* + * Using 0 means that the value of RFC 4960 + * is used. + */ + cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); + } else { + /* + * We take the minimum of the burst limit + * and the initial congestion window. + */ + if ((stcb->asoc.max_burst > 0) && (cwnd_in_mtu > stcb->asoc.max_burst)) + cwnd_in_mtu = stcb->asoc.max_burst; + cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; + } + if (net->cwnd > cwnd) { + /* + * Only set if we are not a timeout (i.e. + * down to 1 mtu) + */ + net->cwnd = cwnd; + } + } + } +} + +static void +sctp_set_rtcc_initial_cc_param(struct sctp_tcb *stcb, + struct sctp_nets *net) +{ + uint64_t vtag, probepoint; + + sctp_set_initial_cc_param(stcb, net); + stcb->asoc.use_precise_time = 1; + probepoint = (((uint64_t) net->cwnd) << 32); + probepoint |= ((9 << 16) | 0); + vtag = (net->rtt << 32) | + (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | + (stcb->rport); + SDT_PROBE(sctp, cwnd, net, rttvar, + vtag, + 0, + 0, + 0, + probepoint); + net->cc_mod.rtcc.lbw_rtt = 0; + net->cc_mod.rtcc.cwnd_at_bw_set = 0; + net->cc_mod.rtcc.lbw = 0; + net->cc_mod.rtcc.bw_tot_time = 0; + net->cc_mod.rtcc.bw_bytes = 0; + net->cc_mod.rtcc.tls_needs_set = 0; + net->cc_mod.rtcc.ret_from_eq = SCTP_BASE_SYSCTL(sctp_rttvar_eqret); +} + +static int +sctp_cwnd_rtcc_socket_option(struct sctp_tcb *stcb, int setorget, + struct sctp_cc_option *cc_opt) +{ + struct sctp_nets *net; + + if (setorget == 1) { + /* a set */ + if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) { + if ((cc_opt->aid_value.assoc_value != 0) && + (cc_opt->aid_value.assoc_value != 1)) { + return (EINVAL); + } + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + net->cc_mod.rtcc.ret_from_eq = cc_opt->aid_value.assoc_value; + } + } else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) { + if ((cc_opt->aid_value.assoc_value != 0) && + (cc_opt->aid_value.assoc_value != 1)) { + return (EINVAL); + } + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + net->cc_mod.rtcc.use_dccc_ecn = cc_opt->aid_value.assoc_value; + } + } else { + return (EINVAL); + } + } else { + /* a get */ + if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) { + net = TAILQ_FIRST(&stcb->asoc.nets); + if (net == NULL) { + return (EFAULT); + } + cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.ret_from_eq; + } else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) { + net = TAILQ_FIRST(&stcb->asoc.nets); + if (net == NULL) { + return (EFAULT); + } + cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.use_dccc_ecn; + } else { + return (EINVAL); + } + } + return (0); +} + +static void +sctp_cwnd_update_rtcc_packet_transmitted(struct sctp_tcb *stcb, + struct sctp_nets *net) +{ + if (net->cc_mod.rtcc.tls_needs_set == 0) { + SCTP_GETPTIME_TIMEVAL(&net->cc_mod.rtcc.tls); + net->cc_mod.rtcc.tls_needs_set = 2; + } +} + +static void +sctp_cwnd_update_rtcc_after_sack(struct sctp_tcb *stcb, + struct sctp_association *asoc, + int accum_moved, int reneged_all, int will_exit) +{ + /* Passing a one argument at the last enables the rtcc algoritm */ + sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 1); +} + + +/* Here starts Sally Floyds HS-TCP */ + struct sctp_hs_raise_drop { int32_t cwnd; int32_t increase; @@ -1710,5 +2189,20 @@ struct sctp_cc_functions sctp_cc_functio .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, .sctp_cwnd_update_after_fr_timer = sctp_htcp_cwnd_update_after_fr_timer + }, + { + .sctp_set_initial_cc_param = sctp_set_rtcc_initial_cc_param, + .sctp_cwnd_update_after_sack = sctp_cwnd_update_rtcc_after_sack, + .sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr, + .sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout, + .sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_rtcc_after_ecn_echo, + .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, + .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, + .sctp_cwnd_update_after_fr_timer = sctp_cwnd_update_after_fr_timer, + .sctp_cwnd_update_packet_transmitted = sctp_cwnd_update_rtcc_packet_transmitted, + .sctp_cwnd_update_tsn_acknowledged = sctp_cwnd_update_rtcc_tsn_acknowledged, + .sctp_cwnd_new_transmission_begins = sctp_cwnd_new_rtcc_transmission_begins, + .sctp_cwnd_prepare_net_for_sack = sctp_cwnd_prepare_rtcc_net_for_sack, + .sctp_cwnd_socket_option = sctp_cwnd_rtcc_socket_option } }; Modified: stable/8/sys/netinet/sctp_constants.h ============================================================================== --- stable/8/sys/netinet/sctp_constants.h Fri Feb 10 20:17:28 2012 (r231423) +++ stable/8/sys/netinet/sctp_constants.h Fri Feb 10 20:19:38 2012 (r231424) @@ -965,10 +965,9 @@ __FBSDID("$FreeBSD$"); * local lan type rtt's */ /* - * We allow 500us for the rtt and another 500us for the cookie processing - * since we measure this on the first rtt. + * We allow 900us for the rtt. */ -#define SCTP_LOCAL_LAN_RTT 1100 +#define SCTP_LOCAL_LAN_RTT 900 #define SCTP_LAN_UNKNOWN 0 #define SCTP_LAN_LOCAL 1 #define SCTP_LAN_INTERNET 2 Modified: stable/8/sys/netinet/sctp_structs.h ============================================================================== --- stable/8/sys/netinet/sctp_structs.h Fri Feb 10 20:17:28 2012 (r231423) +++ stable/8/sys/netinet/sctp_structs.h Fri Feb 10 20:19:38 2012 (r231424) @@ -219,6 +219,21 @@ struct htcp { uint32_t lasttime; }; +struct rtcc_cc { + struct timeval tls; /* The time we started the sending */ + uint64_t lbw; /* Our last estimated bw */ + uint64_t lbw_rtt; /* RTT at bw estimate */ + uint64_t bw_bytes; /* The total bytes since this sending began */ + uint64_t bw_tot_time; /* The total time since sending began */ + uint64_t new_tot_time; /* temp holding the new value */ + uint32_t cwnd_at_bw_set; + uint8_t ret_from_eq; + uint8_t use_dccc_ecn; + uint8_t tls_needs_set; /* Flag to indicate we need to set tls 0 or 1 + * means set at send 2 not */ +}; + + struct sctp_nets { TAILQ_ENTRY(sctp_nets) sctp_next; /* next link */ @@ -255,6 +270,7 @@ struct sctp_nets { struct timeval last_sent_time; union cc_control_data { struct htcp htcp_ca; /* JRS - struct used in HTCP algorithm */ + struct rtcc_cc rtcc; /* rtcc module cc stuff */ } cc_mod; int ref_count; Modified: stable/8/sys/netinet/sctp_sysctl.h ============================================================================== --- stable/8/sys/netinet/sctp_sysctl.h Fri Feb 10 20:17:28 2012 (r231423) +++ stable/8/sys/netinet/sctp_sysctl.h Fri Feb 10 20:19:38 2012 (r231424) @@ -104,11 +104,9 @@ struct sctp_sysctl { uint32_t sctp_mobility_base; uint32_t sctp_mobility_fasthandoff; uint32_t sctp_inits_include_nat_friendly; -#ifdef SCTP_HAS_RTTCC uint32_t sctp_rttvar_bw; uint32_t sctp_rttvar_rtt; uint32_t sctp_rttvar_eqret; -#endif #if defined(SCTP_LOCAL_TRACE_BUF) struct sctp_log sctp_log; #endif @@ -519,7 +517,6 @@ struct sctp_sysctl { #define SCTPCTL_INITIAL_CWND_MIN 0 #define SCTPCTL_INITIAL_CWND_MAX 0xffffffff #define SCTPCTL_INITIAL_CWND_DEFAULT 3 -#ifdef SCTP_HAS_RTTCC /* rttvar smooth avg for bw calc */ #define SCTPCTL_RTTVAR_BW_DESC "Shift amount for bw smothing on rtt calc" @@ -537,7 +534,6 @@ struct sctp_sysctl { #define SCTPCTL_RTTVAR_EQRET_MIN 0 #define SCTPCTL_RTTVAR_EQRET_MAX 1 #define SCTPCTL_RTTVAR_EQRET_DEFAULT 0 -#endif #if defined(SCTP_DEBUG) /* debug: Configure debug output */ Modified: stable/8/sys/netinet/sctp_usrreq.c ============================================================================== --- stable/8/sys/netinet/sctp_usrreq.c Fri Feb 10 20:17:28 2012 (r231423) +++ stable/8/sys/netinet/sctp_usrreq.c Fri Feb 10 20:19:38 2012 (r231424) @@ -1777,6 +1777,7 @@ flags_out: SCTP_TCB_UNLOCK(stcb); } } + break; /* RS - Get socket option for pluggable stream scheduling */ case SCTP_PLUGGABLE_SS: { @@ -2954,6 +2955,7 @@ sctp_setopt(struct socket *so, int optna case SCTP_CC_RFC2581: case SCTP_CC_HSTCP: case SCTP_CC_HTCP: + case SCTP_CC_RTCC: stcb->asoc.cc_functions = sctp_cc_functions[av->assoc_value]; stcb->asoc.congestion_control_module = av->assoc_value; if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) { @@ -2973,6 +2975,7 @@ sctp_setopt(struct socket *so, int optna case SCTP_CC_RFC2581: case SCTP_CC_HSTCP: case SCTP_CC_HTCP: + case SCTP_CC_RTCC: SCTP_INP_WLOCK(inp); inp->sctp_ep.sctp_default_cc_module = av->assoc_value; SCTP_INP_WUNLOCK(inp); @@ -2984,6 +2987,7 @@ sctp_setopt(struct socket *so, int optna } } } + break; case SCTP_CC_OPTION: { struct sctp_cc_option *cc_opt;