Date: Tue, 5 May 2009 07:32:35 GMT From: Andre Oppermann <andre@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 161598 for review Message-ID: <200905050732.n457WZ7p003818@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=161598 Change 161598 by andre@andre_t61 on 2009/05/05 07:32:16 Various WIP. Affected files ... .. //depot/projects/tcp_new/netinet/tcp_output.c#7 edit Differences ... ==== //depot/projects/tcp_new/netinet/tcp_output.c#7 (text+ko) ==== @@ -200,7 +200,7 @@ * * duna = unacknowledged data in flight * swnd = remaining space in send window as advertised by remote end - * cwnd = congestion window, remaing amount of data that can be unacknowledged in flight + * cwnd = congestion window, remaing amount of data that can be in flight unacknowledged * dlen = remaing amount of data in send buffer available for sending * len = amount of data we have *and* can send righ now * @@ -218,14 +218,25 @@ dlen = so->so_snd.sb_cc - duna; len = min(dlen, min(swnd, cwnd)); + /* + * XXXAO: todo token bucket, mss sized + * Retransmits should not fall under pacing limit + * and neither ACKs, window updates, etc. if there + * is no data pending. + */ if (len > 0 && (tp->t_flags & TF_PACE)) { - len = tcp_snd_pace(tp, len); /* XXXAO: todo token bucket, mss sized */ + len = tcp_snd_pace(tp, len); if (len == 0) return (0); /* next token is pending */ } inflight = duna - tp->snd_sacked; + switch (tp->t_phase) { + case TP_IDLE: + break; + } + /* * Send out a SYN immediatly. */ @@ -248,7 +259,7 @@ * if the window is big enough. Do not care about nagle * and others. Otherwise things will go their normal way. */ - if (len > 0) + if (len <= dlen) goto send; } @@ -259,6 +270,7 @@ goto send; if (SEQ_LT(tp->snd_lastack, tp->snd_nxt) && !(tp->t_flags & TF_DELACK)) goto send; + /* * For a duplicate ACK to be acceptable it must not carry any * data nor update the window. This is a serious problem for @@ -268,9 +280,9 @@ * a duplicate ACK (if the ack value didn't move forward). The * question whether the other implementations see it the same way. */ - if ((tp->t_flags & TF_DUPACK) && tp->snd_dupack > 0 && - (tp->t_flags & TF_SACK_PERMIT)) { - len = 0; + if ((tp->t_flags & TF_DUPACK) && tp->snd_dupack > 0) { + if (!(tp->t_flags & TF_SACK_PERMIT)) + len = 0; goto send; } @@ -431,81 +443,13 @@ return (0); send: - SOCKBUF_LOCK(&so->so_snd); + tcp_options(tp, &to, flags); + return (tcp_send(tp, &to, flags)); +} - /* - * Compute options for segment. - * We only have to care about SYN and established connection - * segments. Options for SYN-ACK segments are handled in TCP - * syncache. Before ESTABLISHED, force sending of initial options - * unless TCP set not to do any options. - */ - to.to_flags = 0; - if (!(tp->t_flags & TF_NOOPT)) { - /* - * Maximum segment size. - */ - if (flags & TH_SYN) { - to.to_mss = tcp_mss(&inp->inp_inc, 0, 0); - to.to_flags |= TOF_MSS; - } - - /* - * Window scaling. - */ - if ((flags & TH_SYN) && (tp->t_flags & TF_WINSCALE)) { - to.to_wscale = tp->rcv_scale; - to.to_flags |= TOF_SCALE; - } - - /* - * Timestamps. - */ - if (tp->t_flags & TF_TIMESTAMP) { - to.to_tsval = ticks + tp->ts_offset; - to.to_tsecr = tp->ts_recent; - to.to_flags |= TOF_TS; - /* - * Set receive buffer autosizing timestamp. - */ - if (tp->rfbuf_ts == 0 && - (so->so_rcv.sb_flags & SB_AUTOSIZE)) - tp->rfbuf_ts = ticks; - } - - /* - * Selective ACK's. - */ - if (tp->t_flags & TF_SACK_PERMIT) { - if (flags & TH_SYN) - to.to_flags |= TOF_SACKPERM; - else if (TCPS_HAVEESTABLISHED(tp->t_state) && - tp->rcv_numsacks > 0) { - to.to_flags |= TOF_SACK; - to.to_nsacks = tp->rcv_numsacks; - to.to_sacks = (u_char *)tp->sackblks; - } - } - - /* - * TCP-MD5 (RFC2385). - */ -#ifdef TCP_SIGNATURE -#ifdef INET6 - if (!isipv6 && (tp->t_flags & TF_SIGNATURE)) { -#else - if (tp->t_flags & TF_SIGNATURE) { -#endif /* INET6 */ - to.to_flags |= TOF_SIGNATURE; - } -#endif /* TCP_SIGNATURE */ - - /* - * Processing the options. - */ - optlen = tcp_addoptions(&to, (u_char *)&opt); - } else - optlen = 0; +int +tcp_send(struct tcpcb *tp, struct tcpopt *to, int flags) +{ /* * Be careful not to send data and/or FIN on SYN segments. @@ -535,6 +479,7 @@ else th->th_win = (u_short)(rwin >> tp->rcv_scale); + SOCKBUF_LOCK(&so->so_snd); /* * Fill in fields. */ @@ -547,7 +492,7 @@ } /* - * If resending a SYN or FIN, be sure not to use a new sequence number. + * If resending a SYN or FIN, be sure NOT to use a new sequence number. */ if ((flags & TH_SYN) && (tp->t_flags & TF_SENTSYN)) th->th_seq--; @@ -598,6 +543,18 @@ } /* + * Update last send timestamp. + */ + if ((tp->t_flags & TF_TIMESTAMP) && tp->snd_tslast != to->to_tsval) + tp->snd_tslast = to->to_tsval; + + /* + * Last ACK we sent. + */ + if (tp->snd_lastack != th->th_ack) + tp->snd_lastack = th->th_ack; + + /* * Any pending ACK has been sent. * Clear related flags and disarm the delayed ACK timer. */ @@ -808,6 +765,7 @@ int off, int *olen, int optlen) { int len, slen, hdrs, hdrlen, linkhdr, optlen, ipoptlen; + int error = 0; struct tcphdr *th; struct ip *ip; struct inpcb *inp = tp->t_inpcb; @@ -997,18 +955,12 @@ ("%s: data beyond FIN", __func__); /* - * Set the PUSH bit to indicate that we have reached - * the end of the send buffer. - */ - if (off + slen == so->so_snd.sb_cc) { - th->th_flags =| TH_PSH; - } - - /* * If we're sending everything we've got, set PUSH. * This will keep happy those implementations which * only give data to the user when a buffer fills or * a PUSH comes in. + * Set the PUSH bit to indicate that we have reached + * the end of the send buffer. */ if (slen > 0 && off + slen == so->so_snd.sb_cc) th->th_flags |= TH_PUSH; @@ -1201,6 +1153,84 @@ return; } +void +tcp_options(struct tcpcb *tp, struct tcpopt *to, int flags) +{ + /* + * Compute options for segment. + * We only have to care about SYN and established connection + * segments. Options for SYN-ACK segments are handled in TCP + * syncache. Before ESTABLISHED, force sending of initial options + * unless TCP set not to do any options. + */ + to.to_flags = 0; + if (!(tp->t_flags & TF_NOOPT)) { + /* + * Maximum segment size. + */ + if (flags & TH_SYN) { + to.to_mss = tcp_mss(&inp->inp_inc, 0, 0); + to.to_flags |= TOF_MSS; + } + + /* + * Window scaling. + */ + if ((flags & TH_SYN) && (tp->t_flags & TF_WINSCALE)) { + to.to_wscale = tp->rcv_scale; + to.to_flags |= TOF_SCALE; + } + + /* + * Timestamps. + */ + if (tp->t_flags & TF_TIMESTAMP) { + to.to_tsval = ticks + tp->ts_offset; + to.to_tsecr = tp->ts_recent; + to.to_flags |= TOF_TS; + /* + * Set receive buffer autosizing timestamp. + */ + if (tp->rfbuf_ts == 0 && + (so->so_rcv.sb_flags & SB_AUTOSIZE)) + tp->rfbuf_ts = ticks; + } + + /* + * Selective ACK's. + */ + if (tp->t_flags & TF_SACK_PERMIT) { + if (flags & TH_SYN) + to.to_flags |= TOF_SACKPERM; + else if (TCPS_HAVEESTABLISHED(tp->t_state) && + tp->rcv_numsacks > 0) { + to.to_flags |= TOF_SACK; + to.to_nsacks = tp->rcv_numsacks; + to.to_sacks = (u_char *)tp->sackblks; + } + } + + /* + * TCP-MD5 (RFC2385). + */ +#ifdef TCP_SIGNATURE +#ifdef INET6 + if (!isipv6 && (tp->t_flags & TF_SIGNATURE)) { +#else + if (tp->t_flags & TF_SIGNATURE) { +#endif /* INET6 */ + to.to_flags |= TOF_SIGNATURE; + } +#endif /* TCP_SIGNATURE */ + + /* + * Processing the options. + */ + optlen = tcp_addoptions(&to, (u_char *)&opt); + } else + optlen = 0; +} + static void tcp_snd_autoscale(struct tcpcb *tp, int swnd) {
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200905050732.n457WZ7p003818>