From owner-p4-projects@FreeBSD.ORG Mon Nov 26 00:03:30 2007 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 4959516A46C; Mon, 26 Nov 2007 00:03:30 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 062F116A421 for ; Mon, 26 Nov 2007 00:03:30 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id E765E13C474 for ; Mon, 26 Nov 2007 00:03:29 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.1/8.14.1) with ESMTP id lAQ03T8J053611 for ; Mon, 26 Nov 2007 00:03:29 GMT (envelope-from kmacy@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.1/8.14.1/Submit) id lAQ03T6Q053608 for perforce@freebsd.org; Mon, 26 Nov 2007 00:03:29 GMT (envelope-from kmacy@freebsd.org) Date: Mon, 26 Nov 2007 00:03:29 GMT Message-Id: <200711260003.lAQ03T6Q053608@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to kmacy@freebsd.org using -f From: Kip Macy To: Perforce Change Reviews Cc: Subject: PERFORCE change 129530 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 26 Nov 2007 00:03:30 -0000 http://perforce.freebsd.org/chv.cgi?CH=129530 Change 129530 by kmacy@kmacy:storage:toestack on 2007/11/26 00:03:19 - add a bunch of missed wakeups - set socket as not being able to receive more data when it receives a fin so that the blocking read will return 0 - add inpcb locking for serializing toepcb access - add some debugging printfs for current edge cases Affected files ... .. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#25 edit Differences ... ==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#25 (text+ko) ==== @@ -368,18 +368,28 @@ struct mbuf *m; struct cpl_close_con_req *req; struct tom_data *d; - struct tcpcb *tp = sototcpcb(so); - struct toepcb *toep = tp->t_toe; - unsigned int tid = toep->tp_tid; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp; + struct toepcb *toep; + unsigned int tid; + + + INP_LOCK(inp); + tp = sototcpcb(so); + toep = tp->t_toe; - d = TOM_DATA(toep->tp_toedev); - if (tp->t_state != TCPS_SYN_SENT) t3_push_frames(so, 1); - if (toep->tp_flags & TP_FIN_SENT) + if (toep->tp_flags & TP_FIN_SENT) { + INP_UNLOCK(inp); return; - + } + + tid = toep->tp_tid; + + d = TOM_DATA(toep->tp_toedev); + m = m_gethdr_nofail(sizeof(*req)); toep->tp_flags |= TP_FIN_SENT; @@ -389,7 +399,7 @@ req->wr.wr_lo = htonl(V_WR_TID(tid)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); req->rsvd = htonl(toep->tp_write_seq); - + INP_UNLOCK(inp); /* * XXX - need to defer shutdown while there is still data in the queue * @@ -459,15 +469,15 @@ if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) || (tp->t_state == TCPS_FIN_WAIT_2))) return; - so = tp->t_inpcb->inp_socket; + SOCKBUF_LOCK(&so->so_rcv); read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc; toep->tp_copied_seq += read; toep->tp_enqueued_bytes -= read; credits = toep->tp_copied_seq - toep->tp_rcv_wup; DPRINTF("copied_seq=%u rcv_wup=%u credits=%u\n", toep->tp_copied_seq, toep->tp_rcv_wup, credits); - + SOCKBUF_UNLOCK(&so->so_rcv); /* * XXX this won't accurately reflect credit return - we need * to look at the difference between the amount that has been @@ -512,7 +522,7 @@ struct socket *so; printf("cxgb_toe_disconnect\n"); - + so = tp->t_inpcb->inp_socket; close_conn(so); return (0); @@ -544,6 +554,7 @@ static int cxgb_toe_rcvd(struct tcpcb *tp) { + INP_LOCK_ASSERT(tp->t_inpcb); t3_cleanup_rbuf(tp); return (0); @@ -564,7 +575,7 @@ struct cpl_set_tcb_field *req; struct tcpcb *tp = sototcpcb(so); struct toepcb *toep = tp->t_toe; - + req = mtod(m, struct cpl_set_tcb_field *); m->m_pkthdr.len = m->m_len = sizeof(*req); req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); @@ -588,7 +599,7 @@ if (toep == NULL) return; - + if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) return; @@ -728,6 +739,7 @@ if (!m) return (ENOMEM); + INP_LOCK_ASSERT(tp->t_inpcb); m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, so)); req = mtod(m, struct cpl_get_tcb *); m->m_pkthdr.len = m->m_len = sizeof(*req); @@ -855,10 +867,6 @@ cxgb_remove_tid(cdev, (void *)so, tid); toepcb_release(toep); } -#ifdef notyet - t3_set_ca_ops(so, &tcp_init_congestion_ops); -#endif - TOE_DEV(so) = NULL; #if 0 log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state); #endif @@ -936,16 +944,11 @@ struct toepcb *toep; struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev); - toep = malloc(sizeof(struct toepcb), M_DEVBUF, M_NOWAIT); - + toep = toepcb_alloc(); if (toep == NULL) return (ENOMEM); printf("initializing offload socket\n"); - - toepcb_init(toep); - toepcb_hold(toep); - tp->t_toe = toep; toep->tp_toedev = dev; @@ -1309,7 +1312,7 @@ tp = sototcpcb(so); if ((err = t3_set_cong_control(so, name)) == 0) - tp->t_cong_control = strdup(name, M_DEVBUF); + tp->t_cong_control = strdup(name, M_CXGB); else return (err); } else { @@ -1370,6 +1373,8 @@ struct socket *so = toeptoso(toep); int len = be16toh(hdr->len); + INP_LOCK(tp->t_inpcb); + #ifdef notyet if (__predict_false(sk_no_receive(sk))) { handle_excess_rx(so, skb); @@ -1424,11 +1429,18 @@ "new_rx_data: seq 0x%x len %u", TCP_SKB_CB(skb)->seq, skb->len); #endif + SOCKBUF_LOCK(&so->so_rcv); + if (sb_notify(&so->so_rcv)) + printf("rx_data so=%p flags=0x%x len=%d\n", so, so->so_rcv.sb_flags, m->m_pkthdr.len); + + + sbappendstream_locked(&so->so_rcv, m); + INP_UNLOCK(tp->t_inpcb); - sbappend(&so->so_rcv, m); - if (__predict_true((so->so_state & SS_NOFDREF) == 0)) - sorwakeup(so); + sorwakeup_locked(so); + else + SOCKBUF_UNLOCK(&so->so_rcv); } /* @@ -1686,13 +1698,18 @@ struct toepcb *toep = tp->t_toe; int keep = 0, dead = (so->so_state & SS_NOFDREF); + printf("do_peer_fin state=%d dead=%d\n", tp->t_state, !!dead); + #ifdef T3_TRACE T3_TRACE0(TIDTB(sk),"do_peer_fin:"); #endif - if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) + if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { + printf("abort_pending set\n"); + goto out; - + } + #ifdef notyet if (ULP_MODE(tp) == ULP_MODE_TCPDDP) { keep = handle_peer_close_data(so, skb); @@ -1702,8 +1719,12 @@ sk->sk_shutdown |= RCV_SHUTDOWN; sock_set_flag(so, SOCK_DONE); #endif + if (TCPS_HAVERCVDFIN(tp->t_state) == 0) + socantrcvmore(so); switch (tp->t_state) { case TCPS_SYN_RECEIVED: + tp->t_starttime = ticks; + /* FALLTHROUGH */ case TCPS_ESTABLISHED: tp->t_state = TCPS_CLOSE_WAIT; break; @@ -1737,6 +1758,11 @@ } if (!dead) { + printf("waking up waiters on %p rcv_notify=%d flags=0x%x\n", so, sb_notify(&so->so_rcv), so->so_rcv.sb_flags); + + sorwakeup(so); + sowwakeup(so); + wakeup(&so->so_timeo); #ifdef notyet sk->sk_state_change(sk); @@ -1778,6 +1804,8 @@ tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ + printf("process_close_con_rpl(%p) state=%d dead=%d\n", so, tp->t_state, + !!(so->so_state & SS_NOFDREF)); if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) goto out; @@ -1787,9 +1815,10 @@ if (toep->tp_flags & TP_ABORT_RPL_PENDING) { INP_INFO_WLOCK(&tcbinfo); INP_LOCK(inp); - tcp_close(tp); + tp = tcp_close(tp); INP_INFO_WUNLOCK(&tcbinfo); - INP_UNLOCK(inp); + if (tp) + INP_UNLOCK(inp); } else enter_timewait(so); break; @@ -1802,9 +1831,10 @@ t3_release_offload_resources(so); INP_INFO_WLOCK(&tcbinfo); INP_LOCK(inp); - tcp_close(tp); + tp = tcp_close(tp); INP_INFO_WUNLOCK(&tcbinfo); - INP_UNLOCK(inp); + if (tp) + INP_UNLOCK(inp); break; case TCPS_FIN_WAIT_1: @@ -1817,7 +1847,7 @@ */ sowwakeup(so); sorwakeup(so); - + wakeup(&so->so_timeo); } else printf("FIN_WAIT1 shutdown handling incomplete\n"); @@ -1881,9 +1911,9 @@ !is_t3a(TOE_DEV(so))) { if (toep->tp_flags & TP_ABORT_REQ_RCVD) panic("TP_ABORT_REQ_RCVD set"); - t3_release_offload_resources(so); INP_INFO_WLOCK(&tcbinfo); INP_LOCK(tp->t_inpcb); + t3_release_offload_resources(so); tcp_close(tp); INP_INFO_WUNLOCK(&tcbinfo); INP_UNLOCK(tp->t_inpcb); @@ -2057,9 +2087,9 @@ */ if (__predict_false(parenttp->t_state == TCPS_LISTEN)) { cleanup_syn_rcv_conn(child, parent); - t3_release_offload_resources(child); INP_INFO_WLOCK(&tcbinfo); INP_LOCK(inp); + t3_release_offload_resources(child); tcp_close(childtp); INP_INFO_WUNLOCK(&tcbinfo); INP_UNLOCK(inp); @@ -2136,14 +2166,14 @@ #endif /* * SYN_RECV needs special processing. If abort_syn_rcv() - * returns 0 is has taken care of the abort.2 + * returns 0 is has taken care of the abort. */ if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m)) return; - t3_release_offload_resources(so); INP_INFO_WLOCK(&tcbinfo); INP_LOCK(tp->t_inpcb); + t3_release_offload_resources(so); tcp_close(tp); INP_INFO_WUNLOCK(&tcbinfo); INP_UNLOCK(tp->t_inpcb); @@ -2385,6 +2415,7 @@ * entry already exists - free toepcb * and l2t */ + printf("syncache entry present\n"); toepcb_release(toep); break; case SC_DROP: @@ -2393,6 +2424,7 @@ * either it timed out, or it was evicted * we need to explicitly release the tid */ + printf("syncache entry dropped\n"); toepcb_release(toep); break; default: @@ -2990,6 +3022,8 @@ struct tom_data *d = TOM_DATA(tdev); struct tcpcb *tp = sototcpcb(so); struct toepcb *toep = tp->t_toe; + + INP_LOCK(tp->t_inpcb); /* * It's OK if the TID is currently in use, the owning socket may have @@ -3002,6 +3036,7 @@ toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); socket_act_establish(so, m); + INP_UNLOCK(tp->t_inpcb); return 0; } @@ -3020,6 +3055,8 @@ int bytes = 0; DPRINTF("wr_ack: snd_una=%u credits=%d\n", snd_una, credits); + + INP_LOCK(tp->t_inpcb); toep->tp_wr_avail += credits; if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail) @@ -3090,14 +3127,16 @@ } if (bytes) { DPRINTF("sbdrop(%d)\n", bytes); - - sbdrop(&so->so_snd, bytes); + SOCKBUF_LOCK(&so->so_snd); + sbdrop_locked(&so->so_snd, bytes); + sowwakeup_locked(so); } if (so->so_snd.sb_sndptroff < so->so_snd.sb_cc) t3_push_frames(so, 0); out_free: + INP_UNLOCK(tp->t_inpcb); m_free(m); }