Date: Thu, 16 Dec 2004 13:06:08 -0600 From: Dan Nelson <dnelson@allantgroup.com> To: FreeBSD-gnats-submit@FreeBSD.org, freebsd-bugs@FreeBSD.org Subject: Re: kern/75122: [PATCH] Incorrect inflight bandwidth calculation on first packet Message-ID: <20041216190608.GA21382@dan.emsphone.com> In-Reply-To: <200412151830.iBFIUVwE052799@freefall.freebsd.org> References: <200412151827.iBFIRqDB019997@dan.emsphone.com> <200412151830.iBFIUVwE052799@freefall.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
Updated patch including Matt's recommended fix: Index: tcp_hostcache.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/tcp_hostcache.c,v retrieving revision 1.7 diff -u -p -r1.7 tcp_hostcache.c --- tcp_hostcache.c 16 Aug 2004 18:32:07 -0000 1.7 +++ tcp_hostcache.c 15 Dec 2004 17:42:02 -0000 @@ -175,7 +175,7 @@ SYSCTL_INT(_net_inet_tcp_hostcache, OID_ &tcp_hostcache.purgeall, 0, "Expire all entires on next purge run"); SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list, - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0, + CTLTYPE_STRING | CTLFLAG_RD, 0, 0, sysctl_tcp_hc_list, "A", "List of all hostcache entries"); @@ -676,7 +676,7 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS) (RTM_RTTUNIT / (hz * TCP_RTT_SCALE))), msec(hc_entry->rmx_rttvar * (RTM_RTTUNIT / (hz * TCP_RTT_SCALE))), - hc_entry->rmx_bandwidth * 8, + hc_entry->rmx_bandwidth, hc_entry->rmx_cwnd, hc_entry->rmx_sendpipe, hc_entry->rmx_recvpipe, Index: tcp_subr.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/tcp_subr.c,v retrieving revision 1.201.2.4 diff -u -p -r1.201.2.4 tcp_subr.c --- tcp_subr.c 1 Dec 2004 05:37:28 -0000 1.201.2.4 +++ tcp_subr.c 16 Dec 2004 06:38:48 -0000 @@ -631,7 +631,6 @@ tcp_newtcpcb(inp) tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->t_rcvtime = ticks; - tp->t_bw_rtttime = ticks; /* * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, @@ -1907,9 +1906,10 @@ tcp_twrespond(struct tcptw *tw, int flag void tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq) { - u_long bw; + u_long bw, avgbw; u_long bwnd; int save_ticks; + int delta_ticks; /* * If inflight_enable is disabled in the middle of a tcp connection, @@ -1922,29 +1922,42 @@ tcp_xmit_bandwidth_limit(struct tcpcb *t } /* + * Validate the delta time. If a connection is new or has been idle + * a long time we have to reset the bandwidth calculator. + */ + save_ticks = ticks; + delta_ticks = save_ticks - tp->t_bw_rtttime; + if (tp->t_bw_rtttime == 0 || delta_ticks < 0 || delta_ticks > hz * 10) { + tp->t_bw_rtttime = ticks; + tp->t_bw_rtseq = ack_seq; + if (tp->snd_bandwidth == 0) + tp->snd_bandwidth = tcp_inflight_min; + return; + } + if (delta_ticks == 0) + return; + + /* + * Sanity check, plus ignore pure window update acks. + */ + if ((int)(ack_seq - tp->t_bw_rtseq) <= 0) + return; + + /* * Figure out the bandwidth. Due to the tick granularity this * is a very rough number and it MUST be averaged over a fairly * long period of time. XXX we need to take into account a link * that is not using all available bandwidth, but for now our * slop will ramp us up if this case occurs and the bandwidth later * increases. - * - * Note: if ticks rollover 'bw' may wind up negative. We must - * effectively reset t_bw_rtttime for this case. */ - save_ticks = ticks; - if ((u_int)(save_ticks - tp->t_bw_rtttime) < 1) - return; - bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz / - (save_ticks - tp->t_bw_rtttime); + bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz / delta_ticks; + tp->t_bw_rtttime = save_ticks; tp->t_bw_rtseq = ack_seq; - if (tp->t_bw_rtttime == 0 || (int)bw < 0) - return; - bw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4; - tp->snd_bandwidth = bw; + avgbw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4; /* * Calculate the semi-static bandwidth delay product, plus two maximal @@ -1975,22 +1988,25 @@ tcp_xmit_bandwidth_limit(struct tcpcb *t * no other choice. */ #define USERTT ((tp->t_srtt + tp->t_rttbest) / 2) - bwnd = (int64_t)bw * USERTT / (hz << TCP_RTT_SHIFT) + tcp_inflight_stab * tp->t_maxseg / 10; + bwnd = (int64_t)avgbw * USERTT / (hz << TCP_RTT_SHIFT) + tcp_inflight_stab * tp->t_maxseg / 10; #undef USERTT if (tcp_inflight_debug > 0) { static int ltime; if ((u_int)(ticks - ltime) >= hz / tcp_inflight_debug) { ltime = ticks; - printf("%p bw %ld rttbest %d srtt %d bwnd %ld\n", + printf("%p bw %lu(%lu,%lu) rttbest %d srtt %d bwnd %ld\n", tp, - bw, + avgbw, tp->snd_bandwidth, bw, tp->t_rttbest, tp->t_srtt, bwnd ); } } + + tp->snd_bandwidth = avgbw; + if ((long)bwnd < tcp_inflight_min) bwnd = tcp_inflight_min; if (bwnd > tcp_inflight_max) Index: tcp_usrreq.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.107 diff -u -p -r1.107 tcp_usrreq.c --- tcp_usrreq.c 16 Aug 2004 18:32:07 -0000 1.107 +++ tcp_usrreq.c 16 Dec 2004 06:38:59 -0000 @@ -864,7 +864,6 @@ tcp_connect(tp, nam, td) tp->t_state = TCPS_SYN_SENT; callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); tp->iss = tcp_new_isn(tp); - tp->t_bw_rtseq = tp->iss; tcp_sendseqinit(tp); /* @@ -959,7 +958,6 @@ tcp6_connect(tp, nam, td) tp->t_state = TCPS_SYN_SENT; callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); tp->iss = tcp_new_isn(tp); - tp->t_bw_rtseq = tp->iss; tcp_sendseqinit(tp); /* -- Dan Nelson dnelson@allantgroup.com
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20041216190608.GA21382>