From owner-p4-projects@FreeBSD.ORG Thu Jul 1 07:24:21 2010 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 12027106567D; Thu, 1 Jul 2010 07:24:21 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id C9F74106564A for ; Thu, 1 Jul 2010 07:24:20 +0000 (UTC) (envelope-from andre@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id B7DD78FC1A for ; Thu, 1 Jul 2010 07:24:20 +0000 (UTC) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.3/8.14.3) with ESMTP id o617OKKm063450 for ; Thu, 1 Jul 2010 07:24:20 GMT (envelope-from andre@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.3/8.14.3/Submit) id o617OKa3063448 for perforce@freebsd.org; Thu, 1 Jul 2010 07:24:20 GMT (envelope-from andre@freebsd.org) Date: Thu, 1 Jul 2010 07:24:20 GMT Message-Id: <201007010724.o617OKa3063448@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to andre@freebsd.org using -f From: Andre Oppermann To: Perforce Change Reviews Precedence: bulk Cc: Subject: PERFORCE change 180368 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 01 Jul 2010 07:24:21 -0000 http://p4web.freebsd.org/@@180368?ac=10 Change 180368 by andre@andre_t61 on 2010/07/01 07:24:10 Adjust for new reassembly queue. Affected files ... .. //depot/projects/tcp_new/netinet/tcp_input.c#17 edit .. //depot/projects/tcp_new/netinet/tcp_reass.c#4 edit .. //depot/projects/tcp_new/netinet/tcp_var.h#16 edit Differences ... ==== //depot/projects/tcp_new/netinet/tcp_input.c#17 (text+ko) ==== @@ -1783,6 +1783,8 @@ */ if (tp->t_phase < TP_LOSSRECOV) { //tcp_cc_ack(tp, th, tiwin, acked, tlen, sacked); + } else { + tcp_do_lossrecovery(tp); } KASSERT(tp->snd_cwnd > tp->snd_mss, @@ -1925,7 +1927,7 @@ * If the segment doesn't carry any data, urgent data * or FIN we are done. */ - if (tlen == 0 && (th->th_flags & TH_FIN) == 0 && + if (tlen == 0 && (thflags & TH_FIN) == 0 && !TCPS_HAVERCVDFIN(tp->t_state)) { INP_INFO_WUNLOCK(&tcbinfo); m_freem(m); @@ -1961,13 +1963,16 @@ * segment with urgent that got pulled and now is zero */ if (!TCPS_HAVERCVDFIN(tp->t_state) && - (tlen > 0 || (!TAILQ_EMPTY(&tp->rcv_trq) && (th->th_flags & TH_FIN)))) { + (tlen > 0 || (!TAILQ_EMPTY(&tp->rcv_trq) && (thflags & TH_FIN)))) { int newsize = 0; /* Rcvbuf autoscaling. */ + th_seq = th->th_seq; + th = NULL; /* Safety */ /* * Delayed header drop of IPv[46] and TCP headers. */ m_adj(m, drop_hdrlen); +#if 0 if (m->m_next != NULL) { /* * XXXAO: m_adj doesn't drop mbufs from the front, @@ -1986,6 +1991,7 @@ ("%s: drop_hdrlen too big or mbuf chain broken", __func__)); } +#endif KASSERT(tlen == m_length(m, NULL), ("%s: tlen != mbuf chain length", __func__)); @@ -1996,23 +2002,18 @@ * will go there as well. Whenever a segment or a row of * segments can be reassembled and the left edge exactly * matches, it is returned together for further processing. - * The FIN flag is stored in the mbuf header as M_PROTO1. * Send a forced ACK for every segment we receive when * we are doing reassembly to immediately inform the sender * of missing segments and to trigger fast retransmits. * RFC793: section 3.9, page 70 * RFC1122: section 4.2.2.21 */ - if (th->th_seq != tp->rcv_nxt || !TAILQ_EMPTY(&tp->rcv_trq)) { - //m = tcp_reass(tp, m, tlen, thflags); - m = NULL; + if (th->th_seq != tp->rcv_nxt || !RB_EMPTY(&tp->rcv_trq)) { + m = tcp_reass(tp, m, &tlen, th_seq, &thflags); + tp->t_flags |= TF_ACKNOW; - if (m != NULL && (m->m_flags & M_PROTO1)) - thflags |= TH_FIN; - else if (m == NULL) - thflags &= ~TH_FIN; - - tp->t_flags |= TF_ACKNOW; + KASSERT(m == NULL || tlen == m_length(m, NULL), + ("%s: tlen != mbuf chain length", __func__)); } /* @@ -3031,6 +3032,15 @@ } /* + * Perform loss recovery. + */ +void +tcp_do_lossrecovery(struct tcpcb *tp) +{ + +} + +/* * Process urgent data in TCP segments. * * XXXAO: ==== //depot/projects/tcp_new/netinet/tcp_reass.c#4 (text+ko) ==== @@ -101,7 +101,6 @@ * TODO: * - D-SACK when only one SACK slot available? * - Direct pointer to highest seqnum block in RB-tree? - * - Remove T/TCP gonk. * - Lots of testing. */ @@ -319,11 +318,10 @@ * NB: We must always consume the mbuf. Either by appeding it to * the queue or by freeing it. */ -int -tcp_reass(struct tcpcb *tp, struct tcphdr *th, struct mbuf *m, int len, int hlen) +struct *mbuf +tcp_reass(struct tcpcb *tp, struct mbuf *m, int *tlen, tcp_seq th_seq, int *thflags) { - int thflags = 0; - tcp_seq th_seq; + int len = *tlen; struct socket *so = tp->t_inpcb->inp_socket; struct tcp_reass_block *trb = NULL, *trbn; struct tcp_reass_block trbs; @@ -336,37 +334,10 @@ if (!tcp_reass_enable && RB_EMPTY(&tp->rcv_reass)) goto done; - /* - * Call with th==NULL after becoming established to - * force pre-ESTABLISHED data up to user socket. - * XXX: Was used for T/TCP of which code remains. - */ - if (th == NULL) { - if (!TCPS_HAVEESTABLISHED(tp->t_state) || - RB_EMPTY(&tp->rcv_reass) || - ((trb = RB_MIN(tcp_ra, &tp->rcv_reass)) && - trb->trb_seqs != tp->rcv_nxt)) - return (0); - trb = RB_MIN(tcp_ra, &tp->rcv_reass); - goto present; - } - KASSERT(th != NULL, ("%s: th is NULL", __func__)); KASSERT(m != NULL, ("%s: m is NULL", __func__)); - KASSERT(len + hlen == m_length(m, NULL), - ("%s: len + hlen != mbuf length", __func__)); - KASSERT(hlen <= m_length(m, NULL), - ("%s: hlen > mbuf length", __func__)); - - /* - * Store TCP header information in local variables as - * we may lose access to it after header dropping and - * mbuf compacting. - */ - thflags = th->th_flags; - th_seq = th->th_seq; - th = NULL; /* Prevent further use. */ - + KASSERT(len == m_length(m, NULL), + ("%s: len != mbuf length", __func__)); KASSERT(SEQ_GEQ(th_seq, tp->rcv_nxt), ("%s: sequence number below rcv_nxt", __func__)); KASSERT(!(tp->rcv_nxt == th_seq) || !(RB_EMPTY(&tp->rcv_reass)), @@ -416,16 +387,16 @@ * if it was genuine. * This approach is based on a discussion on TCPM mailing list. */ - if ((thflags & TH_FIN) && tp->rcv_nxt == th_seq) { + if ((*thflags & TH_FIN) && tp->rcv_nxt == th_seq) { tcp_reass_flush(tp); if (m->m_len == 0) { tcp_timer_activate(tp, TT_REASS, 0); - return (thflags); + goto done; } } else if (len == 0) goto done; else - thflags &= ~TH_FIN; + *thflags &= ~TH_FIN; /* Statistics. */ if (tp->rcv_nxt != th_seq) { @@ -433,13 +404,7 @@ TCPSTAT_ADD(tcps_rcvoobyte, len); } - /* - * Remove and free packet header and mtags. - * Trim empty mbufs from head of chain. - * Compact the mbuf chain. - */ - m_demote(m, 1); - m_adj(m, hlen); + /* Compact the mbuf chain. */ m = m_trimhead(m); if (tcp_reass_spacetime && m->m_next != NULL) m = m_collapse(m, M_DONTWAIT, 1024); @@ -451,6 +416,7 @@ trbs.trb_seqe = th_seq + len; trbs.trb_m = m; trbs.trb_mt = m_last(m); + m = NULL; /* * Find a block that has at least partial overlap to either side. @@ -553,7 +519,7 @@ if (trb->trb_seqs == tp->rcv_nxt) goto present; - return (0); + return (m); present: /* @@ -567,26 +533,17 @@ TCPSTAT_INC(tcps_reass_missingseg); - SOCKBUF_LOCK(&so->so_rcv); /* - * We can only ever dequeue one consecutive - * block of data at most. + * We can only ever dequeue one consecutive block of data at most. */ - if (!(so->so_rcv.sb_state & SBS_CANTRCVMORE)) { - sbappendstream_locked(&so->so_rcv, trb->trb_m); - tp->rcv_nxt += SEQ_DELTA(trb->trb_seqs, trb->trb_seqe); - trb->trb_m = NULL; - trb->trb_mt = NULL; - } + m = trb->trb_m; + *tlen = SEQ_DELTA(trb->trb_seqs, trb->trb_seqe); + trb->trb_m = NULL; + trb->trb_mt = NULL; - if (trb == &trbs) - m_freem(trb->trb_m); /* NB: trb_m can be =! NULL */ - else + if (trb != &trbs) tcp_reass_free(tp, trb); - /* NB: sorwakeup_locked() does an implicit socket buffer unlock. */ - sorwakeup_locked(so); - /* * Don't hold on to data in the reassembly queue for too long. * Kernel memory is limited and if the connection doesn't make @@ -604,11 +561,11 @@ tcp_timer_activate(tp, TT_REASS, 0); ND6_HINT(tp); - return (thflags); + return (m); done: m_freem(m); - return (0); + return (m); } /* ==== //depot/projects/tcp_new/netinet/tcp_var.h#16 (text+ko) ==== @@ -71,28 +71,14 @@ #endif /* TCP reassembly queue segment entry. */ -/* XXXAO: new one */ -struct trq { - TAILQ_ENTRY(trq) trq_q; - tcp_seq trq_seq; /* start of segment */ - int trq_len; /* length of segment */ - int trq_segs; /* number of mbufs */ - int trq_flags; /* flags for segment chain */ -#define TRQ_FIN 0x01 /* FIN was on last segment */ - struct mbuf *trq_m; /* mbuf chain of data */ - struct mbuf *trq_ml; /* last mbuf in chain of data */ +struct tcp_reass_block { + RB_ENTRY(tcp_reass_block) trb_rb; + LIST_ENTRY(tcp_reass_block) trb_sack; /* linked list in SACK order */ + tcp_seq trb_seqs; /* start of block */ + tcp_seq trb_seqe; /* end of block */ + struct mbuf *trb_m; /* mbuf chain of data */ + struct mbuf *trb_mt; /* last mbuf in chain of data */ }; -TAILQ_HEAD(trq_head, trq); - -/* XXXAO: old one */ -struct tseg_qent { - LIST_ENTRY(tseg_qent) tqe_q; - int tqe_len; /* TCP segment data length */ - struct tcphdr *tqe_th; /* a pointer to tcp header */ - struct mbuf *tqe_m; /* mbuf contains packet */ -}; -LIST_HEAD(tsegqe_head, tseg_qent); - extern int tcp_reass_qsize; extern struct uma_zone *tcp_reass_zone; @@ -100,17 +86,11 @@ tcp_seq start; /* left */ tcp_seq end; /* right */ }; - struct tcp_sack_block { RB_ENTRY(tcp_sack_block) tsb_rb; /* scoreboard linkage */ struct sackblk tsb_blk; }; -struct sackhint { - struct sackhole *nexthole; - int sack_bytes_rexmit; -}; - struct tcptemp { u_char tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */ struct tcphdr tt_t; @@ -223,9 +203,12 @@ u_long tsval_recent; /* highest ts reflected back to us */ u_long tsval_age; /* when reflected last updated */ - /* old reass */ - struct tsegqe_head t_segq; /* segment reassembly queue */ - int t_segqlen; /* segment reassembly queue length */ + /* reassembly */ + RB_HEAD(tcp_ra, tcp_reass_block) rcv_reass; /* segment reassembly queue */ + int rcv_reass_size; /* segment reassembly memory usage */ + int rcv_reass_blocks; /* blocks in reassembly queue */ + LIST_HEAD(tcp_ras, tcp_reass_block) rcv_reass_sack; /* last additions to reass queue */ + struct sackblk rcv_reass_dsack; /* DSACK block */ /* automatic socket buffer scaling */ tcp_ts rfbuf_ts; /* recv buffer autoscaling timestamp */ @@ -596,9 +579,11 @@ void tcp_fini(void *); char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *, const void *); -int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *); +struct mbuf + *tcp_reass(struct tcpcb *, struct mbuf *, int*, tcp_seq, int*); void tcp_reass_init(void); -void tcp_reass_qfree(struct tcpcb *); +int tcp_reass_sack(struct tcpcb *, u_char *, int); +void tcp_reass_flush(struct tcpcb *); void tcp_input(struct mbuf *, int); u_long tcp_maxmtu(struct in_conninfo *, int *); u_long tcp_maxmtu6(struct in_conninfo *, int *);