From owner-p4-projects@FreeBSD.ORG Sun Feb 3 07:18:55 2008 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id CFED016A419; Sun, 3 Feb 2008 07:18:54 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 957D016A417 for ; Sun, 3 Feb 2008 07:18:54 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 814A513C458 for ; Sun, 3 Feb 2008 07:18:54 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.1/8.14.1) with ESMTP id m137IsHt041210 for ; Sun, 3 Feb 2008 07:18:54 GMT (envelope-from kmacy@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.1/8.14.1/Submit) id m137IrmC041207 for perforce@freebsd.org; Sun, 3 Feb 2008 07:18:53 GMT (envelope-from kmacy@freebsd.org) Date: Sun, 3 Feb 2008 07:18:53 GMT Message-Id: <200802030718.m137IrmC041207@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to kmacy@freebsd.org using -f From: Kip Macy To: Perforce Change Reviews Cc: Subject: PERFORCE change 134697 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 03 Feb 2008 07:18:55 -0000 http://perforce.freebsd.org/chv.cgi?CH=134697 Change 134697 by kmacy@kmacy:storage:toehead on 2008/02/03 07:18:30 - fix socket buffer accounting - use sbappendstream Affected files ... .. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#14 edit .. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#15 edit Differences ... ==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#14 (text+ko) ==== @@ -586,12 +586,19 @@ int dack_mode, must_send, read; u32 thres, credits, dack = 0; + so = tp->t_inpcb->inp_socket; if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) || - (tp->t_state == TCPS_FIN_WAIT_2))) + (tp->t_state == TCPS_FIN_WAIT_2))) { + if (copied) { + SOCKBUF_LOCK(&so->so_rcv); + toep->tp_copied_seq += copied; + SOCKBUF_UNLOCK(&so->so_rcv); + } + return; - INP_LOCK_ASSERT(tp->t_inpcb); + } - so = tp->t_inpcb->inp_socket; + INP_LOCK_ASSERT(tp->t_inpcb); SOCKBUF_LOCK(&so->so_rcv); if (copied) toep->tp_copied_seq += copied; @@ -1755,9 +1762,12 @@ "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u", m->m_seq, q->cur_buf, m->m_pkthdr.len); #endif - sbappend(&so->so_rcv, m); + SOCKBUF_LOCK(&so->so_rcv); + sbappendstream_locked(&so->so_rcv, m); if (__predict_true((so->so_state & SS_NOFDREF) == 0)) - sorwakeup(so); + sorwakeup_locked(so); + else + SOCKBUF_UNLOCK(&so->so_rcv); } /* @@ -1895,11 +1905,12 @@ "new_rx_data: seq 0x%x len %u", m->m_seq, m->m_pkthdr.len); #endif + INP_UNLOCK(tp->t_inpcb); SOCKBUF_LOCK(&so->so_rcv); if (sb_notify(&so->so_rcv)) DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, so->so_rcv.sb_flags, m->m_pkthdr.len); - sbappend_locked(&so->so_rcv, m); + sbappendstream_locked(&so->so_rcv, m); #ifdef notyet /* @@ -1912,7 +1923,7 @@ so, so->so_rcv.sb_cc, so->so_rcv.sb_mbmax)); #endif - INP_UNLOCK(tp->t_inpcb); + DPRINTF("sb_cc=%d sb_mbcnt=%d\n", so->so_rcv.sb_cc, so->so_rcv.sb_mbcnt); @@ -2134,11 +2145,12 @@ tp->rcv_nxt += m->m_len; tp->t_rcvtime = ticks; + SOCKBUF_LOCK(&so->so_rcv); sbappendstream_locked(&so->so_rcv, m); if ((so->so_state & SS_NOFDREF) == 0) sorwakeup_locked(so); - + SOCKBUF_UNLOCK(&so->so_rcv); TRACE_EXIT; } @@ -2230,7 +2242,7 @@ if (!(bsp->flags & DDP_BF_NOFLIP)) q->cur_buf ^= 1; tp->t_rcvtime = ticks; - sbappend(&so->so_rcv, m); + sbappendstream(&so->so_rcv, m); if (__predict_true((so->so_state & SS_NOFDREF) == 0)) sorwakeup(so); return (1); ==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#15 (text+ko) ==== @@ -41,12 +41,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -525,7 +527,42 @@ return pru_sosend(so, addr, uio, top, control, flags, td); } +/* + * Following replacement or removal of the first mbuf on the first mbuf chain + * of a socket buffer, push necessary state changes back into the socket + * buffer so that other consumers see the values consistently. 'nextrecord' + * is the callers locally stored value of the original value of + * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes. + * NOTE: 'nextrecord' may be NULL. + */ +#if 1 +static __inline void +sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord) +{ + + SOCKBUF_LOCK_ASSERT(sb); + /* + * First, update for the new value of nextrecord. If necessary, make + * it the first record. + */ + if (sb->sb_mb != NULL) + sb->sb_mb->m_nextpkt = nextrecord; + else + sb->sb_mb = nextrecord; + /* + * Now update any dependent socket buffer fields to reflect the new + * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the + * addition of a second clause that takes care of the case where + * sb_mb has been updated, but remains the last record. + */ + if (sb->sb_mb == NULL) { + sb->sb_mbtail = NULL; + sb->sb_lastrecord = NULL; + } else if (sb->sb_mb->m_nextpkt == NULL) + sb->sb_lastrecord = sb->sb_mb; +} +#endif #define IS_NONBLOCKING(so) ((so)->so_state & SS_NBIO) @@ -536,14 +573,13 @@ struct toepcb *toep = tp->t_toe; struct mbuf *m; uint32_t offset; - int err, flags, avail, len, buffers_freed, copied, copied_unacked; + int err, flags, avail, len, copied, copied_unacked; int target; /* Read at least this many bytes */ int user_ddp_ok, user_ddp_pending = 0; struct ddp_state *p; struct inpcb *inp = sotoinpcb(so); - - copied = copied_unacked = buffers_freed = 0; + avail = offset = copied = copied_unacked = 0; flags = flagsp ? (*flagsp &~ MSG_EOR) : 0; err = sblock(&so->so_rcv, SBLOCKWAIT(flags)); @@ -583,6 +619,8 @@ so->so_error = 0; goto done; } + if (so->so_rcv.sb_state & SBS_CANTRCVMORE) + goto done; if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) goto done; if (tp->t_state == TCPS_CLOSED) { @@ -625,15 +663,23 @@ } else if (copied >= target) goto done; else { - SOCKBUF_UNLOCK(&so->so_rcv); - INP_LOCK(inp); - t3_cleanup_rbuf(tp, copied_unacked); - INP_UNLOCK(inp); - SOCKBUF_LOCK(&so->so_rcv); - copied_unacked = 0; + int i = 0; + if (copied_unacked) { + SOCKBUF_UNLOCK(&so->so_rcv); + INP_LOCK(inp); + t3_cleanup_rbuf(tp, copied_unacked); + INP_UNLOCK(inp); + copied_unacked = 0; + if (mp_ncpus > 1) + while (i++ < 200 && so->so_rcv.sb_mb == NULL) + cpu_spinwait(); + SOCKBUF_LOCK(&so->so_rcv); + } + if (so->so_rcv.sb_mb) goto restart; - printf("sbwaiting 2 copied=%d target=%d\n", copied, target); + printf("sbwaiting 2 copied=%d target=%d avail=%d so=%p mb=%p cc=%d\n", copied, target, avail, so, + so->so_rcv.sb_mb, so->so_rcv.sb_cc); if ((err = sbwait(&so->so_rcv)) != 0) goto done; } @@ -642,24 +688,27 @@ if (m->m_pkthdr.len == 0) { if ((m->m_ddp_flags & DDP_BF_NOCOPY) == 0) panic("empty mbuf and NOCOPY not set\n"); + printf("dropping empty mbuf\n"); user_ddp_pending = 0; - sbfree(&so->so_rcv, m); - m = so->so_rcv.sb_mb = m_free(m); + sbdroprecord_locked(&so->so_rcv); goto done; } + offset = toep->tp_copied_seq + copied_unacked - m->m_seq; DPRINTF("m=%p copied_seq=0x%x copied_unacked=%d m_seq=0x%x offset=%d pktlen=%d is_ddp(m)=%d\n", m, toep->tp_copied_seq, copied_unacked, m->m_seq, offset, m->m_pkthdr.len, !!is_ddp(m)); + if (offset >= m->m_pkthdr.len) panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x seq 0x%x " "pktlen %d ddp flags 0x%x", offset, toep->tp_copied_seq + copied_unacked, m->m_seq, m->m_pkthdr.len, m->m_ddp_flags); + avail = m->m_pkthdr.len - offset; if (len < avail) { - if (is_ddp(m) && (m->m_ddp_flags & DDP_BF_NOCOPY)) + if (is_ddp(m) && (m->m_ddp_flags & DDP_BF_NOCOPY)) panic("bad state in t3_soreceive\n"); avail = len; - } + } #ifdef URGENT_DATA_SUPPORTED /* * Check if the data we are preparing to copy contains urgent @@ -720,16 +769,15 @@ goto done_unlocked; } SOCKBUF_LOCK(&so->so_rcv); - if (!(resid > uio->uio_resid)) - printf("copied zero bytes :-/ resid=%d uio_resid=%d copied=%d copied_unacked=%d\n", - resid, uio->uio_resid, copied, copied_unacked); - } - - sbdrop_locked(&so->so_rcv, avail); - buffers_freed++; + if (avail != (resid - uio->uio_resid)) + printf("didn't copy all bytes :-/ avail=%d offset=%d pktlen=%d resid=%d uio_resid=%d copied=%d copied_unacked=%d is_ddp(m)=%d\n", + avail, offset, m->m_pkthdr.len, resid, uio->uio_resid, copied, copied_unacked, is_ddp(m)); + } + copied += avail; copied_unacked += avail; len -= avail; + #ifdef URGENT_DATA_SUPPORTED skip_copy: if (tp->urg_data && after(tp->copied_seq + copied_unacked, tp->urg_seq)) @@ -741,7 +789,8 @@ */ if (avail + offset >= m->m_pkthdr.len) { unsigned int fl = m->m_ddp_flags; - int exitnow, got_psh = 0, nomoredata = 0; + int exitnow, count, got_psh = 0, nomoredata = 0; + struct mbuf *nextrecord; if (p->kbuf[0] != NULL && is_ddp(m) && (fl & 1)) { if (is_ddp_psh(m) && user_ddp_pending) @@ -757,25 +806,47 @@ p->ubuf_ddp_ready = 1; } } + + nextrecord = m->m_nextpkt; + count = m->m_pkthdr.len; + while (count) { + count -= m->m_len; + sbfree(&so->so_rcv, m); + so->so_rcv.sb_mb = m_free(m); + m = so->so_rcv.sb_mb; + } + sockbuf_pushsync(&so->so_rcv, nextrecord); + exitnow = got_psh || nomoredata; - if ((so->so_rcv.sb_mb == NULL) && exitnow) + if ((so->so_rcv.sb_mb == NULL) && exitnow) { + printf("exiting\n"); goto done; - } - if (len > 0) + } + if (copied_unacked > (so->so_rcv.sb_hiwat >> 2)) { + SOCKBUF_UNLOCK(&so->so_rcv); + INP_LOCK(inp); + t3_cleanup_rbuf(tp, copied_unacked); + INP_UNLOCK(inp); + copied_unacked = 0; + SOCKBUF_LOCK(&so->so_rcv); + } + } + if (len > 0) goto restart; + + done: -done: /* * If we can still receive decide what to do in preparation for the * next receive. Note that RCV_SHUTDOWN is set if the connection * transitioned to CLOSE but not if it was in that state to begin with. */ if (__predict_true((so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) == 0)) { - SOCKBUF_UNLOCK(&so->so_rcv); - SOCKBUF_LOCK(&so->so_rcv); if (user_ddp_pending) { + SOCKBUF_UNLOCK(&so->so_rcv); + SOCKBUF_LOCK(&so->so_rcv); user_ddp_ok = 0; t3_cancel_ubuf(toep); if (so->so_rcv.sb_mb) { @@ -810,12 +881,11 @@ #endif SOCKBUF_UNLOCK(&so->so_rcv); done_unlocked: - if (copied) { + if (copied_unacked) { INP_LOCK(inp); t3_cleanup_rbuf(tp, copied_unacked); INP_UNLOCK(inp); } - sbunlock(&so->so_rcv); return (err); @@ -844,27 +914,22 @@ * - iovcnt is 1 * */ + if ((tp->t_flags & TF_TOE) && ((flags & (MSG_WAITALL|MSG_OOB|MSG_PEEK|MSG_DONTWAIT)) == 0) -#ifdef notyet - && ((so->so_state & SS_NBIO) == 0) -#endif && (uio->uio_iovcnt == 1) && ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) && (mp0 == NULL)) { tdev = TOE_DEV(so); zcopy_thres = TOM_TUNABLE(tdev, ddp_thres); zcopy_enabled = TOM_TUNABLE(tdev, ddp); - if ((uio->uio_resid > zcopy_thres) && (uio->uio_iovcnt == 1) -#if 0 - && ((so->so_state & SS_NBIO) == 0) -#endif && zcopy_enabled) { rv = t3_soreceive(so, flagsp, uio); if (rv != EAGAIN) return (rv); - } - } + } + } else if (tp->t_flags & TF_TOE) + printf("skipping t3_soreceive\n"); return pru_soreceive(so, psa, uio, mp0, controlp, flagsp); }