From owner-svn-src-user@FreeBSD.ORG Tue Nov 4 23:06:36 2008 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 08A16106567B; Tue, 4 Nov 2008 23:06:36 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id EB8CB8FC0A; Tue, 4 Nov 2008 23:06:35 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id mA4N6Z2I010225; Tue, 4 Nov 2008 23:06:35 GMT (envelope-from kmacy@svn.freebsd.org) Received: (from kmacy@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id mA4N6Zvp010224; Tue, 4 Nov 2008 23:06:35 GMT (envelope-from kmacy@svn.freebsd.org) Message-Id: <200811042306.mA4N6Zvp010224@svn.freebsd.org> From: Kip Macy Date: Tue, 4 Nov 2008 23:06:35 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r184664 - user/kmacy/HEAD_multi_tx/sys/netinet X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 04 Nov 2008 23:06:36 -0000 Author: kmacy Date: Tue Nov 4 23:06:35 2008 New Revision: 184664 URL: http://svn.freebsd.org/changeset/base/184664 Log: Commit rwatson's tcp_input locking changes http://www.watson.org/~robert/freebsd/netperf/20081104-tcp_input-rlock.diff Modified: user/kmacy/HEAD_multi_tx/sys/netinet/tcp_input.c Modified: user/kmacy/HEAD_multi_tx/sys/netinet/tcp_input.c ============================================================================== --- user/kmacy/HEAD_multi_tx/sys/netinet/tcp_input.c Tue Nov 4 23:03:36 2008 (r184663) +++ user/kmacy/HEAD_multi_tx/sys/netinet/tcp_input.c Tue Nov 4 23:06:35 2008 (r184664) @@ -158,6 +158,31 @@ SYSCTL_V_INT(V_NET, vnet_inet, _net_inet CTLFLAG_RW, tcp_autorcvbuf_max, 0, "Max size of automatic receive buffer"); +int tcp_read_locking = 0; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, read_locking, CTLFLAG_RW, + &tcp_read_locking, 0, "Enable read locking strategy"); + +int tcp_rlock_atfirst; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, rlock_atfirst, CTLFLAG_RD, + &tcp_rlock_atfirst, 0, ""); + +int tcp_wlock_atfirst; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_wlock_atfirst, CTLFLAG_RD, + &tcp_wlock_atfirst, 0, ""); + +int tcp_rlock_downgraded; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, rlock_downgraded, CTLFLAG_RD, + &tcp_rlock_downgraded, 0, ""); + +int tcp_wlock_upgraded; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, wlock_upgraded, CTLFLAG_RD, + &tcp_wlock_upgraded, 0, ""); + +int tcp_wlock_looped; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, wlock_looped, CTLFLAG_RD, + &tcp_wlock_looped, 0, ""); + + struct inpcbhead tcb; #define tcb6 tcb /* for KAME src sync over BSD*'s */ struct inpcbinfo tcbinfo; @@ -288,6 +313,10 @@ tcp_input(struct mbuf *m, int off0) #endif struct tcpopt to; /* options in this segment */ char *s = NULL; /* address and port logging */ + int ti_locked; +#define TI_UNLOCKED 1 +#define TI_RLOCKED 2 +#define TI_WLOCKED 3 #ifdef TCPDEBUG /* @@ -440,11 +469,40 @@ tcp_input(struct mbuf *m, int off0) drop_hdrlen = off0 + off; /* - * Locate pcb for segment. - */ - INP_INFO_WLOCK(&V_tcbinfo); + * Locate pcb for segment, which requires a lock on tcbinfo. + * + * If any TCP header flags are present that suggest a global state + * change may arise when processing the packet, we preemptively + * acquire a write lock, and otherwise a read lock. However, we may + * be wrong and also find that we can't upgrade from a read lock to + * a write lock. When this happens, we drop and re-acquire the lock + * as a write lock and loopback to findpcb. + */ + if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || + tcp_read_locking == 0) { + INP_INFO_WLOCK(&V_tcbinfo); + ti_locked = TI_WLOCKED; + tcp_wlock_atfirst++; + } else { + INP_INFO_RLOCK(&V_tcbinfo); + ti_locked = TI_RLOCKED; + tcp_rlock_atfirst++; + } + findpcb: - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + switch (ti_locked) { + case TI_RLOCKED: + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + break; + + case TI_WLOCKED: + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + break; + default: + panic("tcp_input: findpcb ti_locked %d\n", ti_locked); + break; + } + #ifdef IPFIREWALL_FORWARD /* * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. @@ -555,10 +613,40 @@ findpcb: * stray or duplicate segments arriving late. If this segment * was a legitimate new connection attempt the old INPCB gets * removed and we can try again to find a listening socket. + * + * Timewait processing currently always requires a write lock, so if + * we don't have one, acquire one. + * + * XXXRW: This duplicates logic below, and perhaps shouldn't. */ if (inp->inp_vflag & INP_TIMEWAIT) { + switch (ti_locked) { + case TI_RLOCKED: + if (rw_try_upgrade(&tcbinfo.ipi_lock) == 0) { + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&tcbinfo); + INP_INFO_WLOCK(&tcbinfo); + ti_locked = TI_WLOCKED; + tcp_wlock_looped++; + goto findpcb; + } + ti_locked = TI_WLOCKED; + tcp_wlock_upgraded++; + break; + + case TI_WLOCKED: + break; + + default: + panic("tcp_input: INP_TIMEWAIT ti_locked %d", + ti_locked); + } + + INP_INFO_WLOCK_ASSERT(&tcbinfo); + if (thflags & TH_SYN) tcp_dooptions(&to, optp, optlen, TO_SYN); + /* * NB: tcp_twcheck unlocks the INP and frees the mbuf. */ @@ -567,6 +655,7 @@ findpcb: INP_INFO_WUNLOCK(&V_tcbinfo); return; } + /* * The TCPCB may no longer exist if the connection is winding * down or it is in the CLOSED state. Either way we drop the @@ -578,6 +667,67 @@ findpcb: goto dropwithreset; } + /* + * Now that we've found an inpcb, we get to find out whether or not + * we needed a write lock. The rule we use once we have a + * connection is that the packet doesn't have a SYN/FIN/RST flag set, + * and that it's in the established state. This is somewhat + * conservative, but by the time we know for sure, we can no longer + * restart processing. + * + * The inpcb lock is required in order to check the connection state, + * but since it follows the pcbinfo lock, we have to try the upgrade + * and, if that fails, handle failure. Currently we do this by + * dropping all locks, re-acquiring the pcbinfo lock with a write + * lock, and looking up the inpcb again. This is very expensive and + * the trade-off here might not be right. + */ + if (tp->t_state != TCPS_ESTABLISHED || + (thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || + tcp_read_locking == 0) { + switch (ti_locked) { + case TI_RLOCKED: + if (rw_try_upgrade(&tcbinfo.ipi_lock) == 0) { + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&tcbinfo); + INP_INFO_WLOCK(&tcbinfo); + ti_locked = TI_WLOCKED; + tcp_wlock_looped++; + goto findpcb; + } + ti_locked = TI_WLOCKED; + tcp_wlock_upgraded++; + break; + + case TI_WLOCKED: + break; + + default: + panic("tcp_input: upgrade check ti_locked %d", + ti_locked); + } + INP_INFO_WLOCK_ASSERT(&tcbinfo); + } else { + /* + * It's possible that because of looping, we may have + * conservatively acquired a write lock. If so, downgrade. + */ + switch (ti_locked) { + case TI_RLOCKED: + break; + + case TI_WLOCKED: + tcp_rlock_downgraded++; + rw_downgrade(&tcbinfo.ipi_lock); + ti_locked = TI_RLOCKED; + break; + + default: + panic("tcp_input: downgrade check ti_locked %d", + ti_locked); + } + } + #ifdef MAC INP_WLOCK_ASSERT(inp); if (mac_inpcb_check_deliver(inp, m)) @@ -895,8 +1045,20 @@ findpcb: return; dropwithreset: - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); - INP_INFO_WUNLOCK(&V_tcbinfo); + switch (ti_locked) { + case TI_RLOCKED: + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); + break; + + case TI_WLOCKED: + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_WUNLOCK(&V_tcbinfo); + break; + + default: + panic("tcp_input: dropwithreset ti_locked %d", ti_locked); + } if (inp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); @@ -907,10 +1069,22 @@ dropwithreset: goto drop; dropunlock: - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + switch (ti_locked) { + case TI_RLOCKED: + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); + break; + + case TI_WLOCKED: + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_WUNLOCK(&V_tcbinfo); + break; + + default: + panic("tcp_input: dropunlock ti_locked %d", ti_locked); + } if (inp != NULL) INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); drop: INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); @@ -926,10 +1100,10 @@ tcp_do_segment(struct mbuf *m, struct tc { INIT_VNET_INET(tp->t_vnet); int thflags, acked, ourfinisacked, needoutput = 0; - int headlocked = 1; int rstreason, todrop, win; u_long tiwin; struct tcpopt to; + int ti_locked; #ifdef TCPDEBUG /* @@ -942,7 +1116,35 @@ tcp_do_segment(struct mbuf *m, struct tc #endif thflags = th->th_flags; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + /* + * If this is either a state-changing packet or current state isn't + * established, we require a write lock on tcbinfo. Otherwise, we + * allow either a read lock or a write lock, as we may have acquired + * a write lock due to a race. + * + * XXXRW: if tcp_read_locking changes during processing, then we may + * reach here with a read lock even though we now want a write one, + * or a write lock where we might want a read one. We tolerate this + * by checking tcp_read_locking only in the case where we might get a + * read lock by accepting a write lock. We still need to set + * ti_locked so do that using rw_wowned(). This will go away once we + * remove the tcp_read_locking frob. + */ + if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || + tp->t_state != TCPS_ESTABLISHED) { + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + ti_locked = TI_WLOCKED; + } else { +#if 0 + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + ti_locked = TI_RLOCKED; +#endif + if (rw_wowned(&V_tcbinfo.ipi_lock)) + ti_locked = TI_WLOCKED; + else + ti_locked = TI_RLOCKED; + + } INP_WLOCK_ASSERT(tp->t_inpcb); KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", __func__)); @@ -1073,6 +1275,20 @@ tcp_do_segment(struct mbuf *m, struct tc TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) { /* + * By definition, the header-predicted case won't transition + * states, so we shouldn't hold a global write lock here. + * + * XXXRW: Due to tcp_read_locking, temporarily disable + * assertion. + */ +#if 0 + INP_INFO_RLOCK_ASSERT(&tcbinfo); + KASSERT(ti_locked == TI_RLOCKED, + ("tcp_do_segment: header prediction ti_locked %d", + ti_locked)); +#endif + + /* * If last ACK falls within this segment's sequence numbers, * record the timestamp. * NOTE that the test is modified according to the latest @@ -1096,14 +1312,19 @@ tcp_do_segment(struct mbuf *m, struct tc !IN_FASTRECOVERY(tp) && (to.to_flags & TOF_SACK) == 0 && TAILQ_EMPTY(&tp->snd_holes)))) { - KASSERT(headlocked, - ("%s: headlocked", __func__)); - INP_INFO_WUNLOCK(&V_tcbinfo); - headlocked = 0; /* * This is a pure ack for outstanding data. + * + * XXXRW: temporarily tolerate a write lock + * here. */ + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); + else + INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; ++V_tcpstat.tcps_predack; + /* * "bad retransmit" recovery. */ @@ -1190,14 +1411,19 @@ tcp_do_segment(struct mbuf *m, struct tc tlen <= sbspace(&so->so_rcv)) { int newsize = 0; /* automatic sockbuf scaling */ - KASSERT(headlocked, ("%s: headlocked", __func__)); - INP_INFO_WUNLOCK(&V_tcbinfo); - headlocked = 0; /* * This is a pure, in-sequence data packet * with nothing on the reassembly queue and * we have enough buffer space to take it. + * + * XXXRW: Temporarily tolerate write lock here. */ + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&tcbinfo); + else + INP_INFO_WUNLOCK(&tcbinfo); + ti_locked = TI_UNLOCKED; + /* Clean receiver SACK report if present */ if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks) tcp_clean_sackreport(tp); @@ -1424,8 +1650,9 @@ tcp_do_segment(struct mbuf *m, struct tc tp->t_state = TCPS_SYN_RECEIVED; } - KASSERT(headlocked, ("%s: trimthenstep6: head not locked", - __func__)); + INP_INFO_WLOCK_ASSERT(&tcbinfo); + KASSERT(ti_locked == TI_WLOCKED, + ("trimthenstep6: ti_locked %d", ti_locked)); INP_WLOCK_ASSERT(tp->t_inpcb); /* @@ -1553,17 +1780,23 @@ tcp_do_segment(struct mbuf *m, struct tc case TCPS_CLOSE_WAIT: so->so_error = ECONNRESET; close: + KASSERT(ti_locked == TI_WLOCKED, + ("tcp_do_segment: TH_RST 1 ti_locked %d", + ti_locked)); + INP_INFO_WLOCK_ASSERT(&tcbinfo); + tp->t_state = TCPS_CLOSED; V_tcpstat.tcps_drops++; - KASSERT(headlocked, ("%s: trimthenstep6: " - "tcp_close: head not locked", __func__)); tp = tcp_close(tp); break; case TCPS_CLOSING: case TCPS_LAST_ACK: - KASSERT(headlocked, ("%s: trimthenstep6: " - "tcp_close.2: head not locked", __func__)); + KASSERT(ti_locked == TI_WLOCKED, + ("tcp_do_segment: TH_RST 2 ti_locked %d", + ti_locked)); + INP_INFO_WLOCK_ASSERT(&tcbinfo); + tp = tcp_close(tp); break; } @@ -1668,8 +1901,11 @@ tcp_do_segment(struct mbuf *m, struct tc tp->t_state > TCPS_CLOSE_WAIT && tlen) { char *s; - KASSERT(headlocked, ("%s: trimthenstep6: tcp_close.3: head " - "not locked", __func__)); + KASSERT(ti_locked == TI_WLOCKED, ("tcp_do_segment: " + "SS_NOFDEREF && CLOSE_WAIT && tlen ti_locked %d", + ti_locked)); + INP_INFO_WLOCK_ASSERT(&tcbinfo); + if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data after socket " "was closed, sending RST and removing tcpcb\n", @@ -1741,8 +1977,10 @@ tcp_do_segment(struct mbuf *m, struct tc * error and we send an RST and drop the connection. */ if (thflags & TH_SYN) { - KASSERT(headlocked, ("%s: tcp_drop: trimthenstep6: " - "head not locked", __func__)); + KASSERT(ti_locked == TI_WLOCKED, + ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked)); + INP_INFO_WLOCK_ASSERT(&tcbinfo); + tp = tcp_drop(tp, ECONNRESET); rstreason = BANDLIM_UNLIMITED; goto drop; @@ -2029,8 +2267,9 @@ tcp_do_segment(struct mbuf *m, struct tc } process_ACK: - KASSERT(headlocked, ("%s: process_ACK: head not locked", - __func__)); + INP_INFO_LOCK_ASSERT(&tcbinfo); + KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, + ("tcp_input: process_ACK ti_locked %d", ti_locked)); INP_WLOCK_ASSERT(tp->t_inpcb); acked = th->th_ack - tp->snd_una; @@ -2187,11 +2426,9 @@ process_ACK: */ case TCPS_CLOSING: if (ourfinisacked) { - KASSERT(headlocked, ("%s: process_ACK: " - "head not locked", __func__)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); INP_INFO_WUNLOCK(&V_tcbinfo); - headlocked = 0; m_freem(m); return; } @@ -2205,8 +2442,7 @@ process_ACK: */ case TCPS_LAST_ACK: if (ourfinisacked) { - KASSERT(headlocked, ("%s: process_ACK: " - "tcp_close: head not locked", __func__)); + INP_INFO_WLOCK_ASSERT(&tcbinfo); tp = tcp_close(tp); goto drop; } @@ -2215,7 +2451,9 @@ process_ACK: } step6: - KASSERT(headlocked, ("%s: step6: head not locked", __func__)); + INP_INFO_LOCK_ASSERT(&tcbinfo); + KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, + ("tcp_do_segment: step6 ti_locked %d", ti_locked)); INP_WLOCK_ASSERT(tp->t_inpcb); /* @@ -2301,7 +2539,9 @@ step6: tp->rcv_up = tp->rcv_nxt; } dodata: /* XXX */ - KASSERT(headlocked, ("%s: dodata: head not locked", __func__)); + INP_INFO_LOCK_ASSERT(&tcbinfo); + KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, + ("tcp_do_segment: dodata ti_locked %d", ti_locked)); INP_WLOCK_ASSERT(tp->t_inpcb); /* @@ -2420,15 +2660,32 @@ dodata: /* XXX */ * standard timers. */ case TCPS_FIN_WAIT_2: - KASSERT(headlocked == 1, ("%s: dodata: " - "TCP_FIN_WAIT_2: head not locked", __func__)); + INP_INFO_WLOCK_ASSERT(&tcbinfo); + KASSERT(ti_locked == TI_WLOCKED, ("tcp_do_segment: " + "dodata TCP_FIN_WAIT_2 ti_locked: %d", + ti_locked)); + tcp_twstart(tp); INP_INFO_WUNLOCK(&V_tcbinfo); return; } } - INP_INFO_WUNLOCK(&V_tcbinfo); - headlocked = 0; + switch (ti_locked) { + case TI_RLOCKED: + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); + break; + + case TI_WLOCKED: + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_WUNLOCK(&V_tcbinfo); + break; + + default: + panic("tco_do_setment: dodata epilogue ti_locked %d", + ti_locked); + } + ti_locked = TI_UNLOCKED; #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, @@ -2442,10 +2699,11 @@ dodata: /* XXX */ (void) tcp_output(tp); check_delack: - KASSERT(headlocked == 0, ("%s: check_delack: head locked", - __func__)); + KASSERT(ti_locked == TI_UNLOCKED, ("tcp_do_segment: check_delack " + "ti_locked %d", ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); + if (tp->t_flags & TF_DELACK) { tp->t_flags &= ~TF_DELACK; tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); @@ -2454,7 +2712,9 @@ check_delack: return; dropafterack: - KASSERT(headlocked, ("%s: dropafterack: head not locked", __func__)); + KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, + ("tcp_do_segment: dropafterack ti_locked %d", ti_locked)); + /* * Generate an ACK dropping incoming segment if it occupies * sequence space, where the ACK reflects our state. @@ -2481,8 +2741,22 @@ dropafterack: tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif - KASSERT(headlocked, ("%s: headlocked should be 1", __func__)); - INP_INFO_WUNLOCK(&V_tcbinfo); + switch (ti_locked) { + case TI_RLOCKED: + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); + break; + + case TI_WLOCKED: + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_WUNLOCK(&V_tcbinfo); + break; + + default: + panic("tcp_do_sgement: dropafterack epilogue ti_locked %d", + ti_locked); + } + ti_locked = TI_UNLOCKED; tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); INP_WUNLOCK(tp->t_inpcb); @@ -2490,8 +2764,21 @@ dropafterack: return; dropwithreset: - KASSERT(headlocked, ("%s: dropwithreset: head not locked", __func__)); - INP_INFO_WUNLOCK(&V_tcbinfo); + switch (ti_locked) { + case TI_RLOCKED: + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); + break; + + case TI_WLOCKED: + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_WUNLOCK(&V_tcbinfo); + break; + + default: + panic("tcp_do_segment: dropwithreset ti_locked %d", + ti_locked); + } if (tp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); @@ -2501,6 +2788,22 @@ dropwithreset: return; drop: + switch (ti_locked) { + case TI_RLOCKED: + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); + break; + + case TI_WLOCKED: + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_WUNLOCK(&V_tcbinfo); + break; + + case TI_UNLOCKED: + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + break; + } + /* * Drop space held by incoming segment and return. */ @@ -2511,8 +2814,6 @@ drop: #endif if (tp != NULL) INP_WUNLOCK(tp->t_inpcb); - if (headlocked) - INP_INFO_WUNLOCK(&V_tcbinfo); m_freem(m); } @@ -2683,7 +2984,7 @@ tcp_pulloutofband(struct socket *so, str char *cp = mtod(m, caddr_t) + cnt; struct tcpcb *tp = sototcpcb(so); - INP_WLOCK_ASSERT(tp->t_inpcb); + INP_LOCK_ASSERT(tp->t_inpcb); tp->t_iobc = *cp; tp->t_oobflags |= TCPOOB_HAVEDATA; @@ -2830,7 +3131,7 @@ tcp_mss_update(struct tcpcb *tp, int off const size_t min_protoh = sizeof(struct tcpiphdr); #endif - INP_WLOCK_ASSERT(tp->t_inpcb); + INP_LOCK_ASSERT(tp->t_inpcb); /* Initialize. */ #ifdef INET6 @@ -3144,7 +3445,7 @@ tcp_newreno_partial_ack(struct tcpcb *tp tcp_seq onxt = tp->snd_nxt; u_long ocwnd = tp->snd_cwnd; - INP_WLOCK_ASSERT(tp->t_inpcb); + INP_LOCK_ASSERT(tp->t_inpcb); tcp_timer_activate(tp, TT_REXMT, 0); tp->t_rtttime = 0;