From owner-p4-projects@FreeBSD.ORG Thu Jan 31 06:21:36 2008 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id C327616A420; Thu, 31 Jan 2008 06:21:35 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 6EF6516A417 for ; Thu, 31 Jan 2008 06:21:35 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 5763813C4EA for ; Thu, 31 Jan 2008 06:21:35 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.1/8.14.1) with ESMTP id m0V6LZB4074744 for ; Thu, 31 Jan 2008 06:21:35 GMT (envelope-from kmacy@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.1/8.14.1/Submit) id m0V6LZWH074741 for perforce@freebsd.org; Thu, 31 Jan 2008 06:21:35 GMT (envelope-from kmacy@freebsd.org) Date: Thu, 31 Jan 2008 06:21:35 GMT Message-Id: <200801310621.m0V6LZWH074741@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to kmacy@freebsd.org using -f From: Kip Macy To: Perforce Change Reviews Cc: Subject: PERFORCE change 134527 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 31 Jan 2008 06:21:36 -0000 http://perforce.freebsd.org/chv.cgi?CH=134527 Change 134527 by kmacy@kmacy:storage:toehead on 2008/01/31 06:21:26 fix ppod setup, rcv_wnd setting, locking on calls handle_excess_rx, accounting for copied_seq, and socket ddp initialization Affected files ... .. //depot/projects/toehead/sys/dev/cxgb/cxgb_offload.c#3 edit .. //depot/projects/toehead/sys/dev/cxgb/cxgb_sge.c#6 edit .. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#11 edit .. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#12 edit .. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_ddp.c#4 edit .. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_defs.h#4 edit .. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_tom.c#3 edit Differences ... ==== //depot/projects/toehead/sys/dev/cxgb/cxgb_offload.c#3 (text+ko) ==== @@ -380,6 +380,8 @@ struct iff_mac *iffmacp; struct ddp_params *ddpp; struct adap_ports *ports; + struct ofld_page_info *rx_page_info; + struct tp_params *tp = &adapter->params.tp; int port; switch (req) { @@ -444,6 +446,11 @@ case FAILOVER_CLEAR: t3_failover_clear(adapter); break; + case GET_RX_PAGE_INFO: + rx_page_info = data; + rx_page_info->page_size = tp->rx_pg_size; + rx_page_info->num = tp->rx_num_pgs; + break; case ULP_ISCSI_GET_PARAMS: case ULP_ISCSI_SET_PARAMS: if (!offload_running(adapter)) ==== //depot/projects/toehead/sys/dev/cxgb/cxgb_sge.c#6 (text+ko) ==== @@ -73,7 +73,11 @@ #endif int txq_fills = 0; -static int recycle_enable = 1; +/* + * XXX don't re-enable this until TOE stops assuming + * we have an m_ext + */ +static int recycle_enable = 0; extern int cxgb_txq_buf_ring_size; int cxgb_cached_allocations; int cxgb_cached; ==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#11 (text+ko) ==== @@ -1052,10 +1052,9 @@ /* * Determine the receive window size for a socket. */ -static unsigned int -select_rcv_wnd(struct socket *so) +static unsigned long +select_rcv_wnd(struct toedev *dev, struct socket *so) { - struct toedev *dev = TOE_DEV(so); struct tom_data *d = TOM_DATA(dev); unsigned int wnd; unsigned int max_rcv_wnd; @@ -1063,7 +1062,9 @@ if (tcp_do_autorcvbuf) wnd = tcp_autorcvbuf_max; else - wnd = sbspace(&so->so_rcv); + wnd = so->so_rcv.sb_hiwat; + + /* XXX * For receive coalescing to work effectively we need a receive window @@ -1076,8 +1077,14 @@ max_rcv_wnd = (dev->tod_ttid == TOE_ID_CHELSIO_T3B ? (uint32_t)d->rx_page_size * 23 : MAX_RCV_WND); - - return min(wnd, max_rcv_wnd); + + if (wnd > max_rcv_wnd) + wnd = max_rcv_wnd; + + printf("tcp_autorcvbuf_max=%d hiwat=%d min_win=%d max_win=%d returned=%d\n", + tcp_autorcvbuf_max, so->so_rcv.sb_hiwat, MIN_RCV_WND, MAX_RCV_WND, wnd); + + return ((unsigned long) wnd); } /* @@ -1116,7 +1123,7 @@ * XXX broken * */ - tp->rcv_wnd = select_rcv_wnd(so); + tp->rcv_wnd = select_rcv_wnd(dev, so); toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) && tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; toep->tp_qset_idx = 0; @@ -1775,7 +1782,6 @@ handle_ddp_data(struct toepcb *toep, struct mbuf *m) { struct tcpcb *tp = toep->tp_tp; - struct socket *so; struct ddp_state *q; struct ddp_buf_state *bsp; struct cpl_rx_data *hdr = cplhdr(m); @@ -1806,10 +1812,6 @@ bsp->cur_offset += m->m_pkthdr.len; if (!(bsp->flags & DDP_BF_NOFLIP)) q->cur_buf ^= 1; - tp->t_rcvtime = ticks; - - so = toeptoso(toep); - sbappend(&so->so_rcv, m); /* * For now, don't re-enable DDP after a connection fell out of DDP * mode. @@ -1837,7 +1839,7 @@ return; } - if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) + if (toep->tp_ulp_mode == ULP_MODE_TCPDDP && toep->tp_ddp_state.kbuf[0]) handle_ddp_data(toep, m); m->m_seq = ntohl(hdr->seq); @@ -1939,7 +1941,11 @@ struct socket *so = toeptoso(toep); if (__predict_false(so_no_receive(so))) { + struct inpcb *inp = sotoinpcb(so); + + INP_LOCK(inp); handle_excess_rx(toep, m); + INP_UNLOCK(inp); return; } @@ -2056,7 +2062,11 @@ unsigned int ddp_report, buf_idx, when; if (__predict_false(so_no_receive(so))) { + struct inpcb *inp = sotoinpcb(so); + + INP_LOCK(inp); handle_excess_rx(toep, m); + INP_UNLOCK(inp); return; } TRACE_ENTER; @@ -3044,14 +3054,15 @@ newtoep->tp_flags = TP_SYN_RCVD; newtoep->tp_tid = tid; newtoep->tp_toedev = tdev; + tp->rcv_wnd = select_rcv_wnd(tdev, so); - printf("inserting tid=%d\n", tid); + printf("inserting tid=%d rcv_wnd=%ld\n", tid, tp->rcv_wnd); cxgb_insert_tid(cdev, d->client, newtoep, tid); SOCK_LOCK(so); LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry); SOCK_UNLOCK(so); - +#ifdef notyet if (lctx->ulp_mode) { ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA); @@ -3060,7 +3071,22 @@ else newtoep->tp_ulp_mode = lctx->ulp_mode; } +#else + newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && /* !sock_flag(sk, NO_DDP) && */ + tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; + printf("ddp=%d rcv_wnd=%ld min_win=%d\n", + TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN); + + if (newtoep->tp_ulp_mode) { + ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA); + + if (ddp_mbuf == NULL) + newtoep->tp_ulp_mode = 0; + } + +#endif + set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure); DPRINTF("adding request to syn cache\n"); @@ -3342,7 +3368,7 @@ toep->tp_flags = 0; tp->t_toe = toep; reset_wr_list(toep); - tp->rcv_wnd = select_rcv_wnd(so); + tp->rcv_wnd = select_rcv_wnd(tdev, so); DPRINTF("rcv_wnd=%ld\n", tp->rcv_wnd); install_offload_ops(so); @@ -3725,7 +3751,6 @@ const struct tom_data *td = TOM_DATA(TOE_DEV(so)); unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit; - TRACE_ENTER; for (i = 0; i < nppods; ++i) { m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE); m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); @@ -3753,8 +3778,6 @@ send_or_defer(toep, m, 0); ppod_addr += PPOD_SIZE; } - - TRACE_EXIT; return (0); } @@ -3975,7 +3998,6 @@ struct work_request_hdr *wr; struct cpl_set_tcb_field *req; - TRACE_ENTER; wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) + (len1 ? sizeof(*req) : 0) + (modulate ? sizeof(struct cpl_rx_data_ack) : 0); @@ -4021,7 +4043,6 @@ #endif cxgb_ofld_send(TOEP_T3C_DEV(toep), m); - TRACE_EXIT; } void ==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#12 (text+ko) ==== @@ -255,6 +255,11 @@ static int so_should_ddp(const struct toepcb *toep, int last_recv_len) { + + printf("ulp_mode=%d last_recv_len=%d ddp_thresh=%d rcv_wnd=%ld ddp_copy_limit=%d\n", + toep->tp_ulp_mode, last_recv_len, TOM_TUNABLE(toep->tp_toedev, ddp_thres), + toep->tp_tp->rcv_wnd, (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + DDP_RSVD_WIN)); + return toep->tp_ulp_mode == ULP_MODE_TCPDDP && (toep->tp_ddp_state.kbuf[0] == NULL) && last_recv_len > TOM_TUNABLE(toep->tp_toedev, ddp_thres) && toep->tp_tp->rcv_wnd > @@ -276,28 +281,37 @@ static int m_uiomove(const struct mbuf *m, int offset, int len, struct uio *uio) { - int curlen, err = 0; + int curlen, startlen, resid_init, err = 0; caddr_t buf; - + + DPRINTF("m_uiomove(m=%p, offset=%d, len=%d, ...)\n", + m, offset, len); + + startlen = len; + resid_init = uio->uio_resid; while (m && len) { buf = mtod(m, caddr_t); curlen = m->m_len; - if (offset < curlen) { + if (offset && (offset < curlen)) { curlen -= offset; buf += offset; offset = 0; - } else { + } else if (offset) { offset -= curlen; m = m->m_next; continue; } + err = uiomove(buf, min(len, curlen), uio); + if (err) { + printf("uiomove_frombuf returned %d\n", err); + return (err); + } - err = uiomove_frombuf(buf, min(len, curlen), uio); - if (err) - return (err); - len -= min(len, m->m_len); + len -= min(len, curlen); m = m->m_next; } + DPRINTF("copied %d bytes - resid_init=%d uio_resid=%d\n", + startlen - len, resid_init, uio->uio_resid); return (err); } @@ -310,16 +324,24 @@ copy_data(const struct mbuf *m, int offset, int len, struct uio *uio) { struct iovec *to = uio->uio_iov; + int err; + - if (__predict_true(!is_ddp(m))) /* RX_DATA */ + if (__predict_true(!is_ddp(m))) { /* RX_DATA */ return m_uiomove(m, offset, len, uio); - if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */ + } if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */ + TRACE_ENTER; to->iov_len -= len; to->iov_base = ((caddr_t)to->iov_base) + len; uio->uio_iov = to; + uio->uio_resid -= len; + TRACE_EXIT; return (0); } - return t3_ddp_copy(m, offset, uio, len); /* kernel DDP */ + TRACE_ENTER; + err = t3_ddp_copy(m, offset, uio, len); /* kernel DDP */ + TRACE_EXIT; + return (err); } static void @@ -508,6 +530,9 @@ } + +#define IS_NONBLOCKING(so) ((so)->so_state & SS_NBIO) + static int t3_soreceive(struct socket *so, int *flagsp, struct uio *uio) { @@ -515,18 +540,21 @@ struct toepcb *toep = tp->t_toe; struct mbuf *m; uint32_t offset; - int err, flags, avail, len, buffers_freed = 0, copied = 0; + int err, flags, avail, len, buffers_freed, copied, copied_unacked; int target; /* Read at least this many bytes */ int user_ddp_ok, user_ddp_pending = 0; struct ddp_state *p; struct inpcb *inp = sotoinpcb(so); - + + + copied = copied_unacked = buffers_freed = 0; flags = flagsp ? (*flagsp &~ MSG_EOR) : 0; err = sblock(&so->so_rcv, SBLOCKWAIT(flags)); + if (err) return (err); - TRACE_ENTER; + SOCKBUF_LOCK(&so->so_rcv); restart: len = uio->uio_resid; @@ -572,13 +600,14 @@ t3_cleanup_rbuf(tp); INP_UNLOCK(inp); SOCKBUF_LOCK(&so->so_rcv); + copied_unacked = 0; goto restart; } if (p->ubuf && user_ddp_ok && !user_ddp_pending && uio->uio_iov->iov_len > p->kbuf[0]->dgl_length && p->ubuf_ddp_ready) { user_ddp_pending = - !t3_overlay_ubuf(so, uio, (so->so_state & SS_NBIO), flags); + !t3_overlay_ubuf(so, uio, IS_NONBLOCKING(so), flags); if (user_ddp_pending) { p->kbuf_posted++; user_ddp_ok = 0; @@ -588,6 +617,7 @@ /* One shot at DDP if we already have enough data */ if (copied >= target) user_ddp_ok = 0; + printf("sbwaiting 1\n"); if ((err = sbwait(&so->so_rcv)) != 0) goto done; //for timers to work await_ddp_completion(sk, flags, &timeo); @@ -598,7 +628,10 @@ INP_LOCK(inp); t3_cleanup_rbuf(tp); INP_UNLOCK(inp); - SOCKBUF_LOCK(&so->so_rcv); + SOCKBUF_LOCK(&so->so_rcv); + copied_unacked = 0; + printf("sbwaiting 2\n"); + if ((err = sbwait(&so->so_rcv)) != 0) goto done; } @@ -612,10 +645,12 @@ m = so->so_rcv.sb_mb = m_free(m); goto done; } - offset = toep->tp_copied_seq - m->m_seq + 1 /* OFF by one somewhere :-{ */; + offset = toep->tp_copied_seq + copied_unacked - m->m_seq + 1 /* OFF by one somewhere :-{ */; + DPRINTF("m=%p copied_seq=0x%x copied_unacked=%d m_seq=0x%x offset=%d\n", + m, toep->tp_copied_seq, copied_unacked, m->m_seq, offset); if (offset >= m->m_pkthdr.len) - panic("t3_soreceive: BUG: OFFSET > LEN offset %d copied_seq 0x%x seq 0x%x " - "pktlen %d ddp flags 0x%x", offset, toep->tp_copied_seq, m->m_seq, + panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x seq 0x%x " + "pktlen %d ddp flags 0x%x", offset, toep->tp_copied_seq + copied_unacked, m->m_seq, m->m_pkthdr.len, m->m_ddp_flags); avail = m->m_pkthdr.len - offset; if (len < avail) { @@ -630,7 +665,7 @@ * first and we are not delivering urgent data inline. */ if (__predict_false(toep->tp_urg_data)) { - uint32_t urg_offset = tp->rcv_up - tp->copied_seq; + uint32_t urg_offset = tp->rcv_up - tp->copied_seq + copied_unacked; if (urg_offset < avail) { if (urg_offset) { @@ -672,19 +707,28 @@ * If MSG_TRUNC is specified the data is discarded. * XXX need to check pr_atomic */ - if (__predict_true(!(flags & MSG_TRUNC))) + if (__predict_true(!(flags & MSG_TRUNC))) { + int resid = uio->uio_resid; + SOCKBUF_UNLOCK(&so->so_rcv); if ((err = copy_data(m, offset, avail, uio))) { if (err) err = EFAULT; - goto done; + goto done_unlocked; } - - toep->tp_copied_seq += avail; + SOCKBUF_LOCK(&so->so_rcv); + if (!(resid > uio->uio_resid)) + printf("copied zero bytes :-/ resid=%d uio_resid=%d copied=%d copied_unacked=%d\n", + resid, uio->uio_resid, copied, copied_unacked); + } + + sbdrop_locked(&so->so_rcv, avail); + buffers_freed++; copied += avail; + copied_unacked += avail; len -= avail; #ifdef URGENT_DATA_SUPPORTED skip_copy: - if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) + if (tp->urg_data && after(tp->copied_seq + copied_unacked, tp->urg_seq)) tp->urg_data = 0; #endif /* @@ -706,9 +750,6 @@ p->ubuf_ddp_ready = 1; } } - sbfree(&so->so_rcv, m); - m = so->so_rcv.sb_mb = m_free(m); /* XXX need to clean mbuf first */ - buffers_freed++; if ((so->so_rcv.sb_mb == NULL) && got_psh) goto done; @@ -733,7 +774,7 @@ } user_ddp_pending = 0; } - if (p->kbuf_posted == 0) { + if ((p->kbuf[0] != NULL) && (p->kbuf_posted == 0)) { #ifdef T3_TRACE T3_TRACE0(TIDTB(so), "chelsio_recvmsg: about to exit, repost kbuf"); @@ -741,15 +782,19 @@ t3_post_kbuf(so, 1); p->kbuf_posted++; - } else if (so_should_ddp(toep, copied)) { + } else if (so_should_ddp(toep, copied) +#ifdef notyet + && !IS_NONBLOCKING(so) +#endif + ) { printf("entering ddp\n"); t3_enter_ddp(so, TOM_TUNABLE(TOE_DEV(so), ddp_copy_limit), 0); p->kbuf_posted = 1; - } + } else + printf("user_ddp_pending=%d kbuf[0]=%p kbuf_posted=%d so_should_ddp=%d\n", + user_ddp_pending, p->kbuf[0], p->kbuf_posted, so_should_ddp(toep, copied)); } - if (buffers_freed) - t3_cleanup_rbuf(tp); #ifdef T3_TRACE T3_TRACE5(TIDTB(so), "chelsio_recvmsg <-: copied %d len %d buffers_freed %d " @@ -758,9 +803,15 @@ user_ddp_pending); #endif SOCKBUF_UNLOCK(&so->so_rcv); +done_unlocked: + if (copied) { + INP_LOCK(inp); + t3_cleanup_rbuf(tp); + INP_UNLOCK(inp); + } + sbunlock(&so->so_rcv); - TRACE_EXIT; return (err); } @@ -808,10 +859,6 @@ return (rv); } } - if (uio->uio_resid > PAGE_SIZE) - printf("flags=0x%x nonblocking=0x%x iovcnt=%d mp0=%p uio_resid=%d \n", - flags, !!(so->so_state && SS_NBIO), uio->uio_iovcnt, mp0, uio->uio_resid); - return pru_soreceive(so, psa, uio, mp0, controlp, flagsp); } ==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_ddp.c#4 (text+ko) ==== @@ -568,9 +568,11 @@ if (p->kbuf[idx] == NULL) goto err; err = t3_alloc_ppods(d, nppods, &p->kbuf_tag[idx]); - if (err) + if (err) { + printf("t3_alloc_ppods failed err=%d\n", err); goto err; - + } + p->kbuf_nppods[idx] = nppods; p->kbuf[idx]->dgl_length = kbuf_size; p->kbuf[idx]->dgl_offset = 0; @@ -582,6 +584,7 @@ VM_ALLOC_ZERO); if (p->kbuf[idx]->dgl_pages[i] == NULL) { p->kbuf[idx]->dgl_nelem = i; + printf("failed to allocate kbuf pages\n"); goto err; } } @@ -623,13 +626,15 @@ { int page_off; struct ddp_gather_list *gl = (struct ddp_gather_list *)m->m_ddp_gl; - + + TRACE_ENTER; if (!gl->dgl_pages) panic("pages not set\n"); offset += gl->dgl_offset + m->m_cur_offset; page_off = offset & ~PAGE_MASK; + TRACE_EXIT; return uiomove_fromphys(gl->dgl_pages, page_off, len, uio); } @@ -641,8 +646,10 @@ { unsigned int i, j; - if (__predict_false(!td->ppod_map)) + if (__predict_false(!td->ppod_map)) { + printf("ppod_map not set\n"); return (EINVAL); + } mtx_lock(&td->ppod_map_lock); for (i = 0; i < td->nppods; ) { ==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_defs.h#4 (text+ko) ==== @@ -40,8 +40,8 @@ #define toeptoso(toep) ((toep)->tp_tp->t_inpcb->inp_socket) #define sototoep(so) (sototcpcb((so))->t_toe) -#define TRACE_ENTER printf("%s:%s entered", __FUNCTION__, __FILE__) -#define TRACE_EXIT printf("%s:%s:%d exited", __FUNCTION__, __FILE__, __LINE__) +#define TRACE_ENTER printf("%s:%s entered\n", __FUNCTION__, __FILE__) +#define TRACE_EXIT printf("%s:%s:%d exited\n", __FUNCTION__, __FILE__, __LINE__) ==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_tom.c#3 (text+ko) ==== @@ -376,14 +376,12 @@ t->ddp_ulimit = ddp.ulimit; t->pdev = ddp.pdev; t->rx_page_size = rx_page_info.page_size; -#ifdef notyet /* OK if this fails, we just can't do DDP */ t->nppods = (ddp.ulimit + 1 - ddp.llimit) / PPOD_SIZE; - t->ppod_map = t3_alloc_mem(t->nppods); -#endif + t->ppod_map = malloc(t->nppods, M_DEVBUF, M_WAITOK); + mtx_init(&t->ppod_map_lock, "ppod map", NULL, MTX_DEF); #if 0 - spin_lock_init(&t->ppod_map_lock); tom_proc_init(dev); #ifdef CONFIG_SYSCTL t->sysctl = t3_sysctl_register(dev, &t->conf);