Date: Fri, 1 Feb 2008 06:46:00 GMT From: Kip Macy <kmacy@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 134584 for review Message-ID: <200802010646.m116k0u8073764@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=134584 Change 134584 by kmacy@kmacy:storage:toehead on 2008/02/01 06:45:34 - various fixes for ddp socket buffer accounting - verbose logging in ddp path Affected files ... .. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#12 edit .. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#13 edit .. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_ddp.c#5 edit .. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_defs.h#5 edit Differences ... ==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#12 (text+ko) ==== @@ -578,7 +578,7 @@ * to the HW for the amount of data processed. */ void -t3_cleanup_rbuf(struct tcpcb *tp) +t3_cleanup_rbuf(struct tcpcb *tp, int copied) { struct toepcb *toep = tp->t_toe; struct socket *so; @@ -593,10 +593,14 @@ so = tp->t_inpcb->inp_socket; SOCKBUF_LOCK(&so->so_rcv); - read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc; - toep->tp_copied_seq += read; - toep->tp_enqueued_bytes -= read; + if (copied) + toep->tp_copied_seq += copied; + else { + read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc; + toep->tp_copied_seq += read; + } credits = toep->tp_copied_seq - toep->tp_rcv_wup; + toep->tp_enqueued_bytes = so->so_rcv.sb_cc; SOCKBUF_UNLOCK(&so->so_rcv); if (credits > so->so_rcv.sb_mbmax) @@ -686,7 +690,7 @@ cxgb_toe_rcvd(struct tcpcb *tp) { INP_LOCK_ASSERT(tp->t_inpcb); - t3_cleanup_rbuf(tp); + t3_cleanup_rbuf(tp, 0); return (0); } @@ -1742,6 +1746,7 @@ tp = toep->tp_tp; m->m_ddp_gl = (unsigned char *)bsp->gl; + m->m_flags |= M_DDP; m->m_seq = tp->rcv_nxt; tp->rcv_nxt += m->m_pkthdr.len; tp->t_rcvtime = ticks; @@ -1793,7 +1798,10 @@ TRACE_ENTER; q = &toep->tp_ddp_state; bsp = &q->buf_state[q->cur_buf]; - m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; + m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; + + printf("rcv_nxt=0x%x tp->rcv_next=0x%x len=%d\n", + rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len); #ifdef T3_TRACE if ((int)m->m_pkthdr.len < 0) { @@ -1802,11 +1810,14 @@ #endif m->m_ddp_gl = (unsigned char *)bsp->gl; + m->m_flags |= M_DDP; m->m_cur_offset = bsp->cur_offset; m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; if (bsp->flags & DDP_BF_NOCOPY) bsp->flags &= ~DDP_BF_NOCOPY; + printf("ddp flags=0x%x\n", m->m_ddp_flags); + m->m_seq = tp->rcv_nxt; tp->rcv_nxt = rcv_nxt; bsp->cur_offset += m->m_pkthdr.len; @@ -1836,14 +1847,14 @@ if (__predict_false(so_no_receive(so))) { handle_excess_rx(toep, m); INP_UNLOCK(tp->t_inpcb); + TRACE_EXIT; return; } - if (toep->tp_ulp_mode == ULP_MODE_TCPDDP && toep->tp_ddp_state.kbuf[0]) + if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) handle_ddp_data(toep, m); - + m->m_seq = ntohl(hdr->seq); - m->m_ddp_flags = 0; m->m_ulp_mode = 0; /* for iSCSI */ #if VALIDATE_SEQ @@ -1974,7 +1985,7 @@ /* * Overload to store old RCV_NXT */ - m->m_pkthdr.csum_data = tp->rcv_nxt; + m->m_seq = tp->rcv_nxt; tp->rcv_nxt = rcv_nxt; /* @@ -1982,7 +1993,7 @@ * m->m_len here, we need to be very careful that nothing from now on * interprets ->len of this packet the usual way. */ - m->m_len = tp->rcv_nxt - m->m_pkthdr.csum_data; + m->m_len = m->m_pkthdr.len = tp->rcv_nxt - m->m_seq; /* * Figure out where the new data was placed in the buffer and store it @@ -1992,8 +2003,9 @@ end_offset = G_DDP_OFFSET(ddp_report) + ddp_len; m->m_cur_offset = end_offset - m->m_pkthdr.len; m->m_ddp_gl = (unsigned char *)bsp->gl; + m->m_flags |= M_DDP; bsp->cur_offset = end_offset; - + toep->tp_enqueued_bytes += m->m_pkthdr.len; /* * Bit 0 of flags stores whether the DDP buffer is completed. * Note that other parts of the code depend on this being in bit 0. @@ -2001,25 +2013,30 @@ if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) { panic("spurious ddp completion"); } else { - m->m_pkthdr.csum_flags = !!(ddp_report & F_DDP_BUF_COMPLETE); - if (m->m_pkthdr.csum_flags && !(bsp->flags & DDP_BF_NOFLIP)) + m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE); + if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP)) q->cur_buf ^= 1; /* flip buffers */ } if (bsp->flags & DDP_BF_NOCOPY) { - m->m_pkthdr.csum_flags |= (bsp->flags & DDP_BF_NOCOPY); + m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY); bsp->flags &= ~DDP_BF_NOCOPY; } if (ddp_report & F_DDP_PSH) - m->m_pkthdr.csum_flags |= DDP_BF_PSH; + m->m_ddp_flags |= DDP_BF_PSH; tp->t_rcvtime = ticks; + + printf("ddp set and ddp_flags=0x%x len=%d m_seq=0x%x rcv_nxt=0x%x\n", m->m_ddp_flags, m->m_len, m->m_seq, rcv_nxt); + + SOCKBUF_LOCK(&so->so_rcv); sbappendstream_locked(&so->so_rcv, m); if ((so->so_state & SS_NOFDREF) == 0) sorwakeup_locked(so); - + else + SOCKBUF_UNLOCK(&so->so_rcv); TRACE_EXIT; } @@ -2077,7 +2094,7 @@ bsp = &q->buf_state[buf_idx]; when = bsp->cur_offset; - m->m_len = G_DDP_OFFSET(ddp_report) - when; + m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when; #ifdef T3_TRACE T3_TRACE5(TIDTB(sk), @@ -2100,6 +2117,7 @@ G_DDP_OFFSET(ddp_report)); #endif m->m_ddp_gl = (unsigned char *)bsp->gl; + m->m_flags |= M_DDP; m->m_pkthdr.csum_flags = (bsp->flags & DDP_BF_NOCOPY) | 1; if (bsp->flags & DDP_BF_NOCOPY) bsp->flags &= ~DDP_BF_NOCOPY; @@ -2193,6 +2211,7 @@ bsp = &q->buf_state[q->cur_buf]; m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; m->m_ddp_gl = (unsigned char *)bsp->gl; + m->m_flags |= M_DDP; m->m_cur_offset = bsp->cur_offset; m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; @@ -2925,7 +2944,8 @@ th.th_seq = req->rcv_isn; th.th_flags = TH_SYN; - toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn; + toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1; + inc.inc_isipv6 = 0; inc.inc_len = 0; @@ -3075,9 +3095,6 @@ newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && /* !sock_flag(sk, NO_DDP) && */ tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; - printf("ddp=%d rcv_wnd=%ld min_win=%d\n", - TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN); - if (newtoep->tp_ulp_mode) { ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA); @@ -3085,6 +3102,9 @@ newtoep->tp_ulp_mode = 0; } + printf("ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d\n", + TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode); + #endif set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure); @@ -3369,7 +3389,8 @@ tp->t_toe = toep; reset_wr_list(toep); tp->rcv_wnd = select_rcv_wnd(tdev, so); - DPRINTF("rcv_wnd=%ld\n", tp->rcv_wnd); + tp->rcv_nxt = toep->tp_copied_seq; + printf("rcv_wnd=%ld rcv_nxt=0x%x\n", tp->rcv_wnd, tp->rcv_nxt); install_offload_ops(so); toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs); ==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#13 (text+ko) ==== @@ -303,7 +303,7 @@ } err = uiomove(buf, min(len, curlen), uio); if (err) { - printf("uiomove_frombuf returned %d\n", err); + printf("uiomove returned %d\n", err); return (err); } @@ -559,7 +559,7 @@ restart: len = uio->uio_resid; m = so->so_rcv.sb_mb; - target = (flags & MSG_WAITALL) ? min(len, so->so_rcv.sb_hiwat) : so->so_rcv.sb_lowat; + target = (flags & MSG_WAITALL) ? len : so->so_rcv.sb_lowat; p = &toep->tp_ddp_state; user_ddp_ok = p->ubuf_ddp_ready; p->cancel_ubuf = 0; @@ -597,22 +597,27 @@ if (so->so_rcv.sb_mb && !user_ddp_pending) { SOCKBUF_UNLOCK(&so->so_rcv); INP_LOCK(inp); - t3_cleanup_rbuf(tp); + t3_cleanup_rbuf(tp, copied_unacked); INP_UNLOCK(inp); SOCKBUF_LOCK(&so->so_rcv); copied_unacked = 0; goto restart; } - if (p->ubuf && user_ddp_ok && !user_ddp_pending && + if (p->kbuf[0] && user_ddp_ok && !user_ddp_pending && uio->uio_iov->iov_len > p->kbuf[0]->dgl_length && p->ubuf_ddp_ready) { user_ddp_pending = - !t3_overlay_ubuf(so, uio, IS_NONBLOCKING(so), flags); + !t3_overlay_ubuf(so, uio, 1, 1); if (user_ddp_pending) { p->kbuf_posted++; user_ddp_ok = 0; } + printf("user_ddp_pending=%d\n", user_ddp_pending); } + if (p->kbuf[0] && (p->kbuf_posted == 0)) { + t3_post_kbuf(so, 1); + p->kbuf_posted++; + } if (user_ddp_pending) { /* One shot at DDP if we already have enough data */ if (copied >= target) @@ -626,16 +631,15 @@ else { SOCKBUF_UNLOCK(&so->so_rcv); INP_LOCK(inp); - t3_cleanup_rbuf(tp); + t3_cleanup_rbuf(tp, copied_unacked); INP_UNLOCK(inp); SOCKBUF_LOCK(&so->so_rcv); copied_unacked = 0; printf("sbwaiting 2\n"); - if ((err = sbwait(&so->so_rcv)) != 0) goto done; } - goto restart; + goto restart; got_mbuf: if (m->m_pkthdr.len == 0) { if ((m->m_ddp_flags & DDP_BF_NOCOPY) == 0) @@ -645,9 +649,9 @@ m = so->so_rcv.sb_mb = m_free(m); goto done; } - offset = toep->tp_copied_seq + copied_unacked - m->m_seq + 1 /* OFF by one somewhere :-{ */; - DPRINTF("m=%p copied_seq=0x%x copied_unacked=%d m_seq=0x%x offset=%d\n", - m, toep->tp_copied_seq, copied_unacked, m->m_seq, offset); + offset = toep->tp_copied_seq + copied_unacked - m->m_seq; + printf("m=%p copied_seq=0x%x copied_unacked=%d m_seq=0x%x offset=%d pktlen=%d is_ddp(m)=%d\n", + m, toep->tp_copied_seq, copied_unacked, m->m_seq, offset, m->m_pkthdr.len, is_ddp(m)); if (offset >= m->m_pkthdr.len) panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x seq 0x%x " "pktlen %d ddp flags 0x%x", offset, toep->tp_copied_seq + copied_unacked, m->m_seq, @@ -690,19 +694,20 @@ } if (user_ddp_ok && !user_ddp_pending && - /* - * XXX - */ uio->uio_iov->iov_len > p->kbuf[0]->dgl_length && p->ubuf_ddp_ready) { user_ddp_pending = - !t3_overlay_ubuf(so, uio, (so->so_state & SS_NBIO), flags); + !t3_overlay_ubuf(so, uio, 1, 1); if (user_ddp_pending) { p->kbuf_posted++; user_ddp_ok = 0; } - } - + printf("user_ddp_pending=%d\n", user_ddp_pending); + } else + printf("user_ddp_ok=%d user_ddp_pending=%d iov_len=%ld dgl_length=%d ubuf_ddp_ready=%d ulp_mode=%d is_ddp(m)=%d flags=0x%x ubuf=%p kbuf_posted=%d\n", + user_ddp_ok, user_ddp_pending, uio->uio_iov->iov_len, p->kbuf[0] ? p->kbuf[0]->dgl_length : 0, + p->ubuf_ddp_ready, toep->tp_ulp_mode, !!is_ddp(m), m->m_ddp_flags, p->ubuf, p->kbuf_posted); + /* * If MSG_TRUNC is specified the data is discarded. * XXX need to check pr_atomic @@ -739,7 +744,7 @@ unsigned int fl = m->m_ddp_flags; int got_psh = 0; - if (p->ubuf != NULL && is_ddp(m) && (fl & 1)) { + if (p->kbuf[0] != NULL && is_ddp(m) && (fl & 1)) { if (is_ddp_psh(m) && user_ddp_pending) got_psh = 1; @@ -748,6 +753,7 @@ else { p->kbuf_posted--; p->ubuf_ddp_ready = 1; + printf("ubuf ddp ready\n"); } } @@ -756,6 +762,7 @@ } if (len > 0) goto restart; + done: /* @@ -780,6 +787,7 @@ "chelsio_recvmsg: about to exit, repost kbuf"); #endif + printf("posting kbuf\n"); t3_post_kbuf(so, 1); p->kbuf_posted++; } else if (so_should_ddp(toep, copied) @@ -791,9 +799,7 @@ t3_enter_ddp(so, TOM_TUNABLE(TOE_DEV(so), ddp_copy_limit), 0); p->kbuf_posted = 1; - } else - printf("user_ddp_pending=%d kbuf[0]=%p kbuf_posted=%d so_should_ddp=%d\n", - user_ddp_pending, p->kbuf[0], p->kbuf_posted, so_should_ddp(toep, copied)); + } } #ifdef T3_TRACE T3_TRACE5(TIDTB(so), @@ -806,7 +812,7 @@ done_unlocked: if (copied) { INP_LOCK(inp); - t3_cleanup_rbuf(tp); + t3_cleanup_rbuf(tp, copied_unacked); INP_UNLOCK(inp); } ==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_ddp.c#5 (text+ko) ==== @@ -260,7 +260,8 @@ { struct toepcb *toep = sototcpcb(so)->t_toe; struct ddp_state *p = &toep->tp_ddp_state; - + TRACE_ENTER; + p->buf_state[bufidx].cur_offset = p->kbuf[bufidx]->dgl_offset; p->buf_state[bufidx].flags = p->kbuf_noinval ? DDP_BF_NOINVAL : 0; p->buf_state[bufidx].gl = p->kbuf[bufidx]; @@ -282,6 +283,7 @@ V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), modulate); + TRACE_EXIT; } /* @@ -423,13 +425,19 @@ struct ddp_state *p = &toep->tp_ddp_state; struct ddp_buf_state *dbs; - if (p->ubuf == NULL) + + TRACE_ENTER; + if (p->kbuf[0] == NULL) { + TRACE_EXIT; return (EINVAL); - + } + err = setup_uio_ppods(so, uio, 0, &len); - if (err) + if (err) { + TRACE_EXIT; return (err); - + } + ubuf_idx = p->kbuf_idx; p->buf_state[ubuf_idx].flags = DDP_BF_NOFLIP; /* Use existing offset */ @@ -467,6 +475,7 @@ " kbuf_idx %d", p->ubuf_tag, flags, OVERLAY_MASK, ubuf_idx, p->kbuf_idx); #endif + TRACE_EXIT; return (0); } @@ -528,10 +537,11 @@ struct toepcb *toep = sototcpcb(so)->t_toe; struct ddp_state *p = &toep->tp_ddp_state; + TRACE_ENTER; t3_set_ddp_tag(so, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6); t3_set_ddp_buf(so, p->cur_buf, 0, p->kbuf[p->cur_buf]->dgl_length); - t3_repost_kbuf(so, p->cur_buf, modulate, 1); - + t3_repost_kbuf(so, p->cur_buf, modulate, (so->so_state & SS_NBIO)); + TRACE_EXIT; #ifdef T3_TRACE T3_TRACE1(TIDTB(so), "t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf); @@ -545,7 +555,7 @@ int t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall) { - int i, err = ENOMEM; + int i, nonblock, err = ENOMEM; static vm_pindex_t color; unsigned int nppods, kbuf_pages, idx = 0; struct toepcb *toep = sototcpcb(so)->t_toe; @@ -555,11 +565,12 @@ if (kbuf_size > M_TCB_RX_DDP_BUF0_LEN) return (EINVAL); + nonblock = (so->so_state & SS_NBIO); + kbuf_pages = (kbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT; nppods = pages2ppods(kbuf_pages); p->kbuf_noinval = !!waitall; - p->kbuf_tag[NUM_DDP_KBUF - 1] = -1; for (idx = 0; idx < NUM_DDP_KBUF; idx++) { p->kbuf[idx] = @@ -603,10 +614,11 @@ } t3_set_ddp_tag(so, 0, p->kbuf_tag[0] << 6); t3_set_ddp_buf(so, 0, 0, p->kbuf[0]->dgl_length); - t3_repost_kbuf(so, 0, 0, 1); + t3_repost_kbuf(so, 0, 0, nonblock); t3_set_rcv_coalesce_enable(so, TOM_TUNABLE(TOE_DEV(so), ddp_rcvcoalesce)); + printf("ddp entered\n"); #ifdef T3_TRACE T3_TRACE4(TIDTB(so), "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d", @@ -624,20 +636,27 @@ int t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio, int len) { - int page_off; + int page_off, resid_init, err; struct ddp_gather_list *gl = (struct ddp_gather_list *)m->m_ddp_gl; TRACE_ENTER; + resid_init = uio->uio_resid; + if (!gl->dgl_pages) panic("pages not set\n"); offset += gl->dgl_offset + m->m_cur_offset; page_off = offset & ~PAGE_MASK; + err = uiomove_fromphys(gl->dgl_pages, page_off, len, uio); + printf("err=%d resid_init=%d uio_resid=%d offset=%d len=%d\n", + err, resid_init, uio->uio_resid, offset, len); + TRACE_EXIT; - return uiomove_fromphys(gl->dgl_pages, page_off, len, uio); + return (err); } + /* * Allocate n page pods. Returns -1 on failure or the page pod tag. */ ==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_defs.h#5 (text+ko) ==== @@ -61,7 +61,7 @@ void t3_init_wr_tab(unsigned int wr_len); uint32_t t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail); void t3_send_rx_modulate(struct toepcb *toep); -void t3_cleanup_rbuf(struct tcpcb *tp); +void t3_cleanup_rbuf(struct tcpcb *tp, int copied); void t3_init_socket_ops(void); void t3_install_socket_ops(struct socket *so);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200802010646.m116k0u8073764>