Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 24 Nov 2016 14:48:46 +0000 (UTC)
From:      Julien Charbon <jch@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r309108 - in stable/10/sys: dev/cxgb/ulp/tom dev/cxgbe/tom netinet netinet6
Message-ID:  <201611241448.uAOEmkk9078800@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jch
Date: Thu Nov 24 14:48:46 2016
New Revision: 309108
URL: https://svnweb.freebsd.org/changeset/base/309108

Log:
  MFC r286227, r286443:
  
  r286227:
  
  Decompose TCP INP_INFO lock to increase short-lived TCP connections scalability:
  
  - The existing TCP INP_INFO lock continues to protect the global inpcb list
    stability during full list traversal (e.g. tcp_pcblist()).
  
  - A new INP_LIST lock protects inpcb list actual modifications (inp allocation
    and free) and inpcb global counters.
  
  It allows to use TCP INP_INFO_RLOCK lock in critical paths (e.g. tcp_input())
  and INP_INFO_WLOCK only in occasional operations that walk all connections.
  
  PR:			183659
  Differential Revision:	https://reviews.freebsd.org/D2599
  Reviewed by:		jhb, adrian
  Tested by:		adrian, nitroboost-gmail.com
  Sponsored by:		Verisign, Inc.
  
  r286443:
  
  Fix a kernel assertion issue introduced with r286227:
  Avoid too strict INP_INFO_RLOCK_ASSERT checks due to
  tcp_notify() being called from in6_pcbnotify().
  
  Reported by:		Larry Rosenman <ler@lerctr.org>
  Submitted by:		markj, jch

Modified:
  stable/10/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
  stable/10/sys/dev/cxgb/ulp/tom/cxgb_listen.c
  stable/10/sys/dev/cxgbe/tom/t4_connect.c
  stable/10/sys/dev/cxgbe/tom/t4_cpl_io.c
  stable/10/sys/dev/cxgbe/tom/t4_listen.c
  stable/10/sys/netinet/in_pcb.c
  stable/10/sys/netinet/in_pcb.h
  stable/10/sys/netinet/tcp_input.c
  stable/10/sys/netinet/tcp_subr.c
  stable/10/sys/netinet/tcp_syncache.c
  stable/10/sys/netinet/tcp_timer.c
  stable/10/sys/netinet/tcp_timewait.c
  stable/10/sys/netinet/tcp_usrreq.c
  stable/10/sys/netinet/toecore.c
  stable/10/sys/netinet6/in6_pcb.c

Modified: stable/10/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
==============================================================================
--- stable/10/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c	Thu Nov 24 14:44:53 2016	(r309107)
+++ stable/10/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c	Thu Nov 24 14:48:46 2016	(r309108)
@@ -638,7 +638,7 @@ t3_send_fin(struct toedev *tod, struct t
 	unsigned int tid = toep->tp_tid;
 #endif
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	CTR4(KTR_CXGB, "%s: tid %d, toep %p, flags %x", __func__, tid, toep,
@@ -924,12 +924,12 @@ do_act_open_rpl(struct sge_qset *qs, str
 
 	rc = act_open_rpl_status_to_errno(s);
 	if (rc != EAGAIN)
-		INP_INFO_WLOCK(&V_tcbinfo);
+		INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	toe_connect_failed(tod, inp, rc);
 	toepcb_release(toep);	/* unlocks inp */
 	if (rc != EAGAIN)
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	m_freem(m);
 	return (0);
@@ -1060,7 +1060,7 @@ send_reset(struct toepcb *toep)
 	struct adapter *sc = tod->tod_softc;
 	struct mbuf *m;
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	CTR4(KTR_CXGB, "%s: tid %d, toep %p (%x)", __func__, tid, toep,
@@ -1171,12 +1171,12 @@ do_rx_data(struct sge_qset *qs, struct r
 		SOCKBUF_UNLOCK(so_rcv);
 		INP_WUNLOCK(inp);
 
-		INP_INFO_WLOCK(&V_tcbinfo);
+		INP_INFO_RLOCK(&V_tcbinfo);
 		INP_WLOCK(inp);
 		tp = tcp_drop(tp, ECONNRESET);
 		if (tp)
 			INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 
 		m_freem(m);
 		return (0);
@@ -1221,7 +1221,7 @@ do_peer_close(struct sge_qset *qs, struc
 	struct tcpcb *tp;
 	struct socket *so;
 
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
@@ -1249,7 +1249,7 @@ do_peer_close(struct sge_qset *qs, struc
 	case TCPS_FIN_WAIT_2:
 		tcp_twstart(tp);
 		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 
 		INP_WLOCK(inp);
 		toepcb_release(toep);	/* no more CPLs expected */
@@ -1263,7 +1263,7 @@ do_peer_close(struct sge_qset *qs, struc
 
 done:
 	INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	m_freem(m);
 	return (0);
@@ -1284,7 +1284,7 @@ do_close_con_rpl(struct sge_qset *qs, st
 	struct tcpcb *tp;
 	struct socket *so;
 
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
@@ -1302,7 +1302,7 @@ do_close_con_rpl(struct sge_qset *qs, st
 		tcp_twstart(tp);
 release:
 		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 
 		INP_WLOCK(inp);
 		toepcb_release(toep);	/* no more CPLs expected */
@@ -1327,7 +1327,7 @@ release:
 
 done:
 	INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	m_freem(m);
 	return (0);
@@ -1488,7 +1488,7 @@ do_abort_req(struct sge_qset *qs, struct
 		return (do_abort_req_synqe(qs, r, m));
 
 	inp = toep->tp_inp;
-	INP_INFO_WLOCK(&V_tcbinfo);	/* for tcp_close */
+	INP_INFO_RLOCK(&V_tcbinfo);	/* for tcp_close */
 	INP_WLOCK(inp);
 
 	tp = intotcpcb(inp);
@@ -1502,7 +1502,7 @@ do_abort_req(struct sge_qset *qs, struct
 		toep->tp_flags |= TP_ABORT_REQ_RCVD;
 		toep->tp_flags |= TP_ABORT_SHUTDOWN;
 		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		m_freem(m);
 		return (0);
 	}
@@ -1522,7 +1522,7 @@ do_abort_req(struct sge_qset *qs, struct
 			INP_WLOCK(inp);	/* re-acquire */
 		toepcb_release(toep);	/* no more CPLs expected */
 	}
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	send_abort_rpl(tod, tid, qset);
 	m_freem(m);

Modified: stable/10/sys/dev/cxgb/ulp/tom/cxgb_listen.c
==============================================================================
--- stable/10/sys/dev/cxgb/ulp/tom/cxgb_listen.c	Thu Nov 24 14:44:53 2016	(r309107)
+++ stable/10/sys/dev/cxgb/ulp/tom/cxgb_listen.c	Thu Nov 24 14:48:46 2016	(r309108)
@@ -553,11 +553,11 @@ do_pass_accept_req(struct sge_qset *qs, 
 			REJECT_PASS_ACCEPT();	/* no l2te, or ifp mismatch */
 	}
 
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 
 	/* Don't offload if the 4-tuple is already in use */
 	if (toe_4tuple_check(&inc, &th, ifp) != 0) {
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		REJECT_PASS_ACCEPT();
 	}
 
@@ -570,7 +570,7 @@ do_pass_accept_req(struct sge_qset *qs, 
 		 * resources tied to this listen context.
 		 */
 		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		REJECT_PASS_ACCEPT();
 	}
 	so = inp->inp_socket;
@@ -698,7 +698,7 @@ do_pass_establish(struct sge_qset *qs, s
 	struct toepcb *toep;
 	struct socket *so;
 	struct listen_ctx *lctx = synqe->lctx;
-	struct inpcb *inp = lctx->inp;
+	struct inpcb *inp = lctx->inp, *new_inp;
 	struct tcpopt to;
 	struct tcphdr th;
 	struct in_conninfo inc;
@@ -712,7 +712,7 @@ do_pass_establish(struct sge_qset *qs, s
 	KASSERT(qs->idx == synqe->qset,
 	    ("%s qset mismatch %d %d", __func__, qs->idx, synqe->qset));
 
-	INP_INFO_WLOCK(&V_tcbinfo);	/* for syncache_expand */
+	INP_INFO_RLOCK(&V_tcbinfo);	/* for syncache_expand */
 	INP_WLOCK(inp);
 
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
@@ -726,7 +726,7 @@ do_pass_establish(struct sge_qset *qs, s
 		    ("%s: listen socket dropped but tid %u not aborted.",
 		    __func__, tid));
 		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		m_freem(m);
 		return (0);
 	}
@@ -742,7 +742,7 @@ do_pass_establish(struct sge_qset *qs, s
 reset:
 		t3_send_reset_synqe(tod, synqe);
 		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		m_freem(m);
 		return (0);
 	}
@@ -760,21 +760,23 @@ reset:
 		goto reset;
 	}
 
-	if (__predict_false(!(synqe->flags & TP_SYNQE_EXPANDED))) {
-		struct inpcb *new_inp = sotoinpcb(so);
+	/* New connection inpcb is already locked by syncache_expand(). */
+	new_inp = sotoinpcb(so);
+	INP_WLOCK_ASSERT(new_inp);
 
-		INP_WLOCK(new_inp);
+	if (__predict_false(!(synqe->flags & TP_SYNQE_EXPANDED))) {
 		tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
 		t3_offload_socket(tod, synqe, so);
-		INP_WUNLOCK(new_inp);
 	}
 
+	INP_WUNLOCK(new_inp);
+
 	/* Remove the synq entry and release its reference on the lctx */
 	TAILQ_REMOVE(&lctx->synq, synqe, link);
 	inp = release_lctx(td, lctx);
 	if (inp)
 		INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 	release_synqe(synqe);
 
 	m_freem(m);
@@ -1140,7 +1142,7 @@ t3_offload_socket(struct toedev *tod, vo
 	struct cpl_pass_establish *cpl = synqe->cpl;
 	struct toepcb *toep = synqe->toep;
 
-	INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
 	INP_WLOCK_ASSERT(inp);
 
 	offload_socket(so, toep);

Modified: stable/10/sys/dev/cxgbe/tom/t4_connect.c
==============================================================================
--- stable/10/sys/dev/cxgbe/tom/t4_connect.c	Thu Nov 24 14:44:53 2016	(r309107)
+++ stable/10/sys/dev/cxgbe/tom/t4_connect.c	Thu Nov 24 14:48:46 2016	(r309108)
@@ -179,12 +179,12 @@ act_open_failure_cleanup(struct adapter 
 	toep->tid = -1;
 
 	if (status != EAGAIN)
-		INP_INFO_WLOCK(&V_tcbinfo);
+		INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	toe_connect_failed(tod, inp, status);
 	final_cpl_received(toep);	/* unlocks inp */
 	if (status != EAGAIN)
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 
 static int

Modified: stable/10/sys/dev/cxgbe/tom/t4_cpl_io.c
==============================================================================
--- stable/10/sys/dev/cxgbe/tom/t4_cpl_io.c	Thu Nov 24 14:44:53 2016	(r309107)
+++ stable/10/sys/dev/cxgbe/tom/t4_cpl_io.c	Thu Nov 24 14:48:46 2016	(r309108)
@@ -1086,7 +1086,7 @@ do_peer_close(struct sge_iq *iq, const s
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
@@ -1128,7 +1128,7 @@ do_peer_close(struct sge_iq *iq, const s
 	case TCPS_FIN_WAIT_2:
 		tcp_twstart(tp);
 		INP_UNLOCK_ASSERT(inp);	 /* safe, we have a ref on the inp */
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 
 		INP_WLOCK(inp);
 		final_cpl_received(toep);
@@ -1140,7 +1140,7 @@ do_peer_close(struct sge_iq *iq, const s
 	}
 done:
 	INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (0);
 }
 
@@ -1167,7 +1167,7 @@ do_close_con_rpl(struct sge_iq *iq, cons
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
@@ -1185,7 +1185,7 @@ do_close_con_rpl(struct sge_iq *iq, cons
 		tcp_twstart(tp);
 release:
 		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 
 		INP_WLOCK(inp);
 		final_cpl_received(toep);	/* no more CPLs expected */
@@ -1209,7 +1209,7 @@ release:
 	}
 done:
 	INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (0);
 }
 
@@ -1368,7 +1368,7 @@ do_abort_req(struct sge_iq *iq, const st
 	}
 
 	inp = toep->inp;
-	INP_INFO_WLOCK(&V_tcbinfo);	/* for tcp_close */
+	INP_INFO_RLOCK(&V_tcbinfo);	/* for tcp_close */
 	INP_WLOCK(inp);
 
 	tp = intotcpcb(inp);
@@ -1402,7 +1402,7 @@ do_abort_req(struct sge_iq *iq, const st
 
 	final_cpl_received(toep);
 done:
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 	send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
 	return (0);
 }
@@ -1520,12 +1520,12 @@ do_rx_data(struct sge_iq *iq, const stru
 		SOCKBUF_UNLOCK(sb);
 		INP_WUNLOCK(inp);
 
-		INP_INFO_WLOCK(&V_tcbinfo);
+		INP_INFO_RLOCK(&V_tcbinfo);
 		INP_WLOCK(inp);
 		tp = tcp_drop(tp, ECONNRESET);
 		if (tp)
 			INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 
 		return (0);
 	}

Modified: stable/10/sys/dev/cxgbe/tom/t4_listen.c
==============================================================================
--- stable/10/sys/dev/cxgbe/tom/t4_listen.c	Thu Nov 24 14:44:53 2016	(r309107)
+++ stable/10/sys/dev/cxgbe/tom/t4_listen.c	Thu Nov 24 14:48:46 2016	(r309108)
@@ -931,7 +931,7 @@ t4_offload_socket(struct toedev *tod, vo
 	struct cpl_pass_establish *cpl = mtod(synqe->syn, void *);
 	struct toepcb *toep = *(struct toepcb **)(cpl + 1);
 
-	INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(synqe->flags & TPF_SYNQE,
 	    ("%s: %p not a synq_entry?", __func__, arg));
@@ -1332,15 +1332,15 @@ found:
 		REJECT_PASS_ACCEPT();
 	rpl = wrtod(wr);
 
-	INP_INFO_WLOCK(&V_tcbinfo);	/* for 4-tuple check */
+	INP_INFO_RLOCK(&V_tcbinfo);	/* for 4-tuple check */
 
 	/* Don't offload if the 4-tuple is already in use */
 	if (toe_4tuple_check(&inc, &th, ifp) != 0) {
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		free(wr, M_CXGBE);
 		REJECT_PASS_ACCEPT();
 	}
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	inp = lctx->inp;		/* listening socket, not owned by TOE */
 	INP_WLOCK(inp);
@@ -1398,7 +1398,7 @@ found:
 	hold_synqe(synqe);	/* hold for the duration it's in the synq */
 	hold_lctx(lctx);	/* A synqe on the list has a ref on its lctx */
 
-	/*
+        /*
 	 * If all goes well t4_syncache_respond will get called during
 	 * syncache_add.  Note that syncache_add releases the pcb lock.
 	 */
@@ -1516,7 +1516,7 @@ do_pass_establish(struct sge_iq *iq, con
 	unsigned int tid = GET_TID(cpl);
 	struct synq_entry *synqe = lookup_tid(sc, tid);
 	struct listen_ctx *lctx = synqe->lctx;
-	struct inpcb *inp = lctx->inp;
+	struct inpcb *inp = lctx->inp, *new_inp;
 	struct socket *so;
 	struct tcphdr th;
 	struct tcpopt to;
@@ -1534,7 +1534,7 @@ do_pass_establish(struct sge_iq *iq, con
 	KASSERT(synqe->flags & TPF_SYNQE,
 	    ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
 
-	INP_INFO_WLOCK(&V_tcbinfo);	/* for syncache_expand */
+	INP_INFO_RLOCK(&V_tcbinfo);	/* for syncache_expand */
 	INP_WLOCK(inp);
 
 	CTR6(KTR_CXGBE,
@@ -1550,7 +1550,7 @@ do_pass_establish(struct sge_iq *iq, con
 		}
 
 		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		return (0);
 	}
 
@@ -1575,7 +1575,7 @@ reset:
 		 */
 		send_reset_synqe(TOEDEV(ifp), synqe);
 		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		return (0);
 	}
 	toep->tid = tid;
@@ -1609,6 +1609,10 @@ reset:
 		goto reset;
 	}
 
+	/* New connection inpcb is already locked by syncache_expand(). */
+	new_inp = sotoinpcb(so);
+	INP_WLOCK_ASSERT(new_inp);
+
 	/*
 	 * This is for the unlikely case where the syncache entry that we added
 	 * has been evicted from the syncache, but the syncache_expand above
@@ -1619,20 +1623,18 @@ reset:
 	 * this somewhat defeats the purpose of having a tod_offload_socket :-(
 	 */
 	if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
-		struct inpcb *new_inp = sotoinpcb(so);
-
-		INP_WLOCK(new_inp);
 		tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
 		t4_offload_socket(TOEDEV(ifp), synqe, so);
-		INP_WUNLOCK(new_inp);
 	}
 
+	INP_WUNLOCK(new_inp);
+
 	/* Done with the synqe */
 	TAILQ_REMOVE(&lctx->synq, synqe, link);
 	inp = release_lctx(sc, lctx);
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 	release_synqe(synqe);
 
 	return (0);

Modified: stable/10/sys/netinet/in_pcb.c
==============================================================================
--- stable/10/sys/netinet/in_pcb.c	Thu Nov 24 14:44:53 2016	(r309107)
+++ stable/10/sys/netinet/in_pcb.c	Thu Nov 24 14:48:46 2016	(r309108)
@@ -215,6 +215,7 @@ in_pcbinfo_init(struct inpcbinfo *pcbinf
 
 	INP_INFO_LOCK_INIT(pcbinfo, name);
 	INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash");	/* XXXRW: argument? */
+	INP_LIST_LOCK_INIT(pcbinfo, "pcbinfolist");
 #ifdef VIMAGE
 	pcbinfo->ipi_vnet = curvnet;
 #endif
@@ -253,6 +254,7 @@ in_pcbinfo_destroy(struct inpcbinfo *pcb
 	in_pcbgroup_destroy(pcbinfo);
 #endif
 	uma_zdestroy(pcbinfo->ipi_zone);
+	INP_LIST_LOCK_DESTROY(pcbinfo);
 	INP_HASH_LOCK_DESTROY(pcbinfo);
 	INP_INFO_LOCK_DESTROY(pcbinfo);
 }
@@ -267,7 +269,14 @@ in_pcballoc(struct socket *so, struct in
 	struct inpcb *inp;
 	int error;
 
-	INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+	if (pcbinfo == &V_tcbinfo) {
+		INP_INFO_RLOCK_ASSERT(pcbinfo);
+	} else {
+		INP_INFO_WLOCK_ASSERT(pcbinfo);
+	}
+#endif
+
 	error = 0;
 	inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
 	if (inp == NULL)
@@ -299,6 +308,8 @@ in_pcballoc(struct socket *so, struct in
 			inp->inp_flags |= IN6P_IPV6_V6ONLY;
 	}
 #endif
+	INP_WLOCK(inp);
+	INP_LIST_WLOCK(pcbinfo);
 	LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
 	pcbinfo->ipi_count++;
 	so->so_pcb = (caddr_t)inp;
@@ -306,9 +317,9 @@ in_pcballoc(struct socket *so, struct in
 	if (V_ip6_auto_flowlabel)
 		inp->inp_flags |= IN6P_AUTOFLOWLABEL;
 #endif
-	INP_WLOCK(inp);
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	refcount_init(&inp->inp_refcount, 1);	/* Reference from inpcbinfo */
+	INP_LIST_WUNLOCK(pcbinfo);
 #if defined(IPSEC) || defined(MAC)
 out:
 	if (error != 0) {
@@ -1194,7 +1205,13 @@ in_pcbfree(struct inpcb *inp)
 
 	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
 
-	INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+	if (pcbinfo == &V_tcbinfo) {
+		INP_INFO_LOCK_ASSERT(pcbinfo);
+	} else {
+		INP_INFO_WLOCK_ASSERT(pcbinfo);
+	}
+#endif
 	INP_WLOCK_ASSERT(inp);
 
 	/* XXXRW: Do as much as possible here. */
@@ -1202,8 +1219,10 @@ in_pcbfree(struct inpcb *inp)
 	if (inp->inp_sp != NULL)
 		ipsec_delete_pcbpolicy(inp);
 #endif
+	INP_LIST_WLOCK(pcbinfo);
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	in_pcbremlists(inp);
+	INP_LIST_WUNLOCK(pcbinfo);
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6PROTO) {
 		ip6_freepcbopts(inp->in6p_outputopts);
@@ -1360,7 +1379,7 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo
 	struct ip_moptions *imo;
 	int i, gap;
 
-	INP_INFO_RLOCK(pcbinfo);
+	INP_INFO_WLOCK(pcbinfo);
 	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
 		INP_WLOCK(inp);
 		imo = inp->inp_moptions;
@@ -1390,7 +1409,7 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo
 		}
 		INP_WUNLOCK(inp);
 	}
-	INP_INFO_RUNLOCK(pcbinfo);
+	INP_INFO_WUNLOCK(pcbinfo);
 }
 
 /*
@@ -2023,8 +2042,16 @@ in_pcbremlists(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 
-	INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+	if (pcbinfo == &V_tcbinfo) {
+		INP_INFO_RLOCK_ASSERT(pcbinfo);
+	} else {
+		INP_INFO_WLOCK_ASSERT(pcbinfo);
+	}
+#endif
+
 	INP_WLOCK_ASSERT(inp);
+	INP_LIST_WLOCK_ASSERT(pcbinfo);
 
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	if (inp->inp_flags & INP_INHASHLIST) {
@@ -2169,13 +2196,13 @@ inp_apply_all(void (*func)(struct inpcb 
 {
 	struct inpcb *inp;
 
-	INP_INFO_RLOCK(&V_tcbinfo);
+	INP_INFO_WLOCK(&V_tcbinfo);
 	LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
 		INP_WLOCK(inp);
 		func(inp, arg);
 		INP_WUNLOCK(inp);
 	}
-	INP_INFO_RUNLOCK(&V_tcbinfo);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
 }
 
 struct socket *

Modified: stable/10/sys/netinet/in_pcb.h
==============================================================================
--- stable/10/sys/netinet/in_pcb.h	Thu Nov 24 14:44:53 2016	(r309107)
+++ stable/10/sys/netinet/in_pcb.h	Thu Nov 24 14:48:46 2016	(r309108)
@@ -128,23 +128,35 @@ struct in_conninfo {
 struct	icmp6_filter;
 
 /*-
- * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4
- * and IPv6 sockets.  In the case of TCP, further per-connection state is
+ * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and
+ * IPv6 sockets.  In the case of TCP and UDP, further per-connection state is
  * hung off of inp_ppcb most of the time.  Almost all fields of struct inpcb
  * are static after creation or protected by a per-inpcb rwlock, inp_lock.  A
- * few fields also require the global pcbinfo lock for the inpcb to be held,
- * when modified, such as the global connection lists and hashes, as well as
- * binding information (which affects which hash a connection is on).  This
- * model means that connections can be looked up without holding the
- * per-connection lock, which is important for performance when attempting to
- * find the connection for a packet given its IP and port tuple.  Writing to
- * these fields that write locks be held on both the inpcb and global locks.
+ * few fields are protected by multiple locks as indicated in the locking notes
+ * below.  For these fields, all of the listed locks must be write-locked for
+ * any modifications.  However, these fields can be safely read while any one of
+ * the listed locks are read-locked.  This model can permit greater concurrency
+ * for read operations.  For example, connections can be looked up while only
+ * holding a read lock on the global pcblist lock.  This is important for
+ * performance when attempting to find the connection for a packet given its IP
+ * and port tuple.
+ *
+ * One noteworthy exception is that the global pcbinfo lock follows a different
+ * set of rules in relation to the inp_list field.  Rather than being
+ * write-locked for modifications and read-locked for list iterations, it must
+ * be read-locked during modifications and write-locked during list iterations.
+ * This ensures that the relatively rare global list iterations safely walk a
+ * stable snapshot of connections while allowing more common list modifications
+ * to safely grab the pcblist lock just while adding or removing a connection
+ * from the global list.
  *
  * Key:
  * (c) - Constant after initialization
  * (g) - Protected by the pcbgroup lock
  * (i) - Protected by the inpcb lock
  * (p) - Protected by the pcbinfo lock for the inpcb
+ * (l) - Protected by the pcblist lock for the inpcb
+ * (h) - Protected by the pcbhash lock for the inpcb
  * (s) - Protected by another subsystem's locks
  * (x) - Undefined locking
  *
@@ -159,15 +171,21 @@ struct	icmp6_filter;
  * socket has been freed), or there may be close(2)-related races.
  *
  * The inp_vflag field is overloaded, and would otherwise ideally be (c).
+ *
+ * TODO:  Currently only the TCP stack is leveraging the global pcbinfo lock
+ * read-lock usage during modification, this model can be applied to other
+ * protocols (especially SCTP).
  */
 struct inpcb {
-	LIST_ENTRY(inpcb) inp_hash;	/* (i/p) hash list */
+	LIST_ENTRY(inpcb) inp_hash;	/* (h/i) hash list */
 	LIST_ENTRY(inpcb) inp_pcbgrouphash;	/* (g/i) hash list */
-	LIST_ENTRY(inpcb) inp_list;	/* (i/p) list for all PCBs for proto */
+	LIST_ENTRY(inpcb) inp_list;	/* (p/l) list for all PCBs for proto */
+	                                /* (p[w]) for list iteration */
+	                                /* (p[r]/l) for addition/removal */
 	void	*inp_ppcb;		/* (i) pointer to per-protocol pcb */
 	struct	inpcbinfo *inp_pcbinfo;	/* (c) PCB list info */
 	struct	inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
-	LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */
+	LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */
 	struct	socket *inp_socket;	/* (i) back pointer to socket */
 	struct	ucred	*inp_cred;	/* (c) cache of socket cred */
 	u_int32_t inp_flow;		/* (i) IPv6 flow information */
@@ -185,7 +203,7 @@ struct inpcb {
 					 *     general use */
 
 	/* Local and foreign ports, local and foreign addr. */
-	struct	in_conninfo inp_inc;	/* (i/p) list for PCB's local port */
+	struct	in_conninfo inp_inc;	/* (i) list for PCB's local port */
 
 	/* MAC and IPSEC policy information. */
 	struct	label *inp_label;	/* (i) MAC label */
@@ -210,8 +228,8 @@ struct inpcb {
 		int	inp6_cksum;
 		short	inp6_hops;
 	} inp_depend6;
-	LIST_ENTRY(inpcb) inp_portlist;	/* (i/p) */
-	struct	inpcbport *inp_phd;	/* (i/p) head of this list */
+	LIST_ENTRY(inpcb) inp_portlist;	/* (i/h) */
+	struct	inpcbport *inp_phd;	/* (i/h) head of this list */
 #define inp_zero_size offsetof(struct inpcb, inp_gencnt)
 	inp_gen_t	inp_gencnt;	/* (c) generation count */
 	struct llentry	*inp_lle;	/* cached L2 information */
@@ -275,37 +293,46 @@ struct inpcbport {
  * Global data structure for each high-level protocol (UDP, TCP, ...) in both
  * IPv4 and IPv6.  Holds inpcb lists and information for managing them.
  *
- * Each pcbinfo is protected by two locks: ipi_lock and ipi_hash_lock,
- * the former covering mutable global fields (such as the global pcb list),
- * and the latter covering the hashed lookup tables.  The lock order is:
+ * Each pcbinfo is protected by three locks: ipi_lock, ipi_hash_lock and
+ * ipi_list_lock:
+ *  - ipi_lock covering the global pcb list stability during loop iteration,
+ *  - ipi_hash_lock covering the hashed lookup tables,
+ *  - ipi_list_lock covering mutable global fields (such as the global
+ *    pcb list)
+ *
+ * The lock order is:
  *
- *    ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks}
+ *    ipi_lock (before)
+ *        inpcb locks (before)
+ *            ipi_list locks (before)
+ *                {ipi_hash_lock, pcbgroup locks}
  *
  * Locking key:
  *
  * (c) Constant or nearly constant after initialisation
  * (g) Locked by ipi_lock
+ * (l) Locked by ipi_list_lock
  * (h) Read using either ipi_hash_lock or inpcb lock; write requires both
  * (p) Protected by one or more pcbgroup locks
  * (x) Synchronisation properties poorly defined
  */
 struct inpcbinfo {
 	/*
-	 * Global lock protecting global inpcb list, inpcb count, etc.
+	 * Global lock protecting full inpcb list traversal
 	 */
 	struct rwlock		 ipi_lock;
 
 	/*
 	 * Global list of inpcbs on the protocol.
 	 */
-	struct inpcbhead	*ipi_listhead;		/* (g) */
-	u_int			 ipi_count;		/* (g) */
+	struct inpcbhead	*ipi_listhead;		/* (g/l) */
+	u_int			 ipi_count;		/* (l) */
 
 	/*
 	 * Generation count -- incremented each time a connection is allocated
 	 * or freed.
 	 */
-	u_quad_t		 ipi_gencnt;		/* (g) */
+	u_quad_t		 ipi_gencnt;		/* (l) */
 
 	/*
 	 * Fields associated with port lookup and allocation.
@@ -363,6 +390,11 @@ struct inpcbinfo {
 	 * general use 2
 	 */
 	void 			*ipi_pspare[2];
+
+	/*
+	 * Global lock protecting global inpcb list, inpcb count, etc.
+	 */
+	struct rwlock		 ipi_list_lock;
 };
 
 #ifdef _KERNEL
@@ -455,6 +487,7 @@ short	inp_so_options(const struct inpcb 
 #define INP_INFO_TRY_RLOCK(ipi)	rw_try_rlock(&(ipi)->ipi_lock)
 #define INP_INFO_TRY_WLOCK(ipi)	rw_try_wlock(&(ipi)->ipi_lock)
 #define INP_INFO_TRY_UPGRADE(ipi)	rw_try_upgrade(&(ipi)->ipi_lock)
+#define INP_INFO_WLOCKED(ipi)	rw_wowned(&(ipi)->ipi_lock)
 #define INP_INFO_RUNLOCK(ipi)	rw_runlock(&(ipi)->ipi_lock)
 #define INP_INFO_WUNLOCK(ipi)	rw_wunlock(&(ipi)->ipi_lock)
 #define	INP_INFO_LOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_LOCKED)
@@ -462,6 +495,25 @@ short	inp_so_options(const struct inpcb 
 #define INP_INFO_WLOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
 #define INP_INFO_UNLOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
 
+#define INP_LIST_LOCK_INIT(ipi, d) \
+        rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
+#define INP_LIST_LOCK_DESTROY(ipi)  rw_destroy(&(ipi)->ipi_list_lock)
+#define INP_LIST_RLOCK(ipi)     rw_rlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_WLOCK(ipi)     rw_wlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_UPGRADE(ipi)       rw_try_upgrade(&(ipi)->ipi_list_lock)
+#define INP_LIST_RUNLOCK(ipi)   rw_runlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_WUNLOCK(ipi)   rw_wunlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_LOCK_ASSERT(ipi) \
+	rw_assert(&(ipi)->ipi_list_lock, RA_LOCKED)
+#define INP_LIST_RLOCK_ASSERT(ipi) \
+	rw_assert(&(ipi)->ipi_list_lock, RA_RLOCKED)
+#define INP_LIST_WLOCK_ASSERT(ipi) \
+	rw_assert(&(ipi)->ipi_list_lock, RA_WLOCKED)
+#define INP_LIST_UNLOCK_ASSERT(ipi) \
+	rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED)
+
 #define	INP_HASH_LOCK_INIT(ipi, d) \
 	rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
 #define	INP_HASH_LOCK_DESTROY(ipi)	rw_destroy(&(ipi)->ipi_hash_lock)

Modified: stable/10/sys/netinet/tcp_input.c
==============================================================================
--- stable/10/sys/netinet/tcp_input.c	Thu Nov 24 14:44:53 2016	(r309107)
+++ stable/10/sys/netinet/tcp_input.c	Thu Nov 24 14:48:46 2016	(r309108)
@@ -571,7 +571,7 @@ tcp_input(struct mbuf *m, int off0)
 	char *s = NULL;			/* address and port logging */
 	int ti_locked;
 #define	TI_UNLOCKED	1
-#define	TI_WLOCKED	2
+#define	TI_RLOCKED	2
 
 #ifdef TCPDEBUG
 	/*
@@ -757,8 +757,8 @@ tcp_input(struct mbuf *m, int off0)
 	 * connection in TIMEWAIT and SYNs not targeting a listening socket.
 	 */
 	if ((thflags & (TH_FIN | TH_RST)) != 0) {
-		INP_INFO_WLOCK(&V_tcbinfo);
-		ti_locked = TI_WLOCKED;
+		INP_INFO_RLOCK(&V_tcbinfo);
+		ti_locked = TI_RLOCKED;
 	} else
 		ti_locked = TI_UNLOCKED;
 
@@ -780,8 +780,8 @@ tcp_input(struct mbuf *m, int off0)
 
 findpcb:
 #ifdef INVARIANTS
-	if (ti_locked == TI_WLOCKED) {
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	if (ti_locked == TI_RLOCKED) {
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	} else {
 		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	}
@@ -941,11 +941,11 @@ findpcb:
 relocked:
 	if (inp->inp_flags & INP_TIMEWAIT) {
 		if (ti_locked == TI_UNLOCKED) {
-			if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
+			if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
 				in_pcbref(inp);
 				INP_WUNLOCK(inp);
-				INP_INFO_WLOCK(&V_tcbinfo);
-				ti_locked = TI_WLOCKED;
+				INP_INFO_RLOCK(&V_tcbinfo);
+				ti_locked = TI_RLOCKED;
 				INP_WLOCK(inp);
 				if (in_pcbrele_wlocked(inp)) {
 					inp = NULL;
@@ -956,9 +956,9 @@ relocked:
 					goto findpcb;
 				}
 			} else
-				ti_locked = TI_WLOCKED;
+				ti_locked = TI_RLOCKED;
 		}
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 		if (thflags & TH_SYN)
 			tcp_dooptions(&to, optp, optlen, TO_SYN);
@@ -967,7 +967,7 @@ relocked:
 		 */
 		if (tcp_twcheck(inp, &to, th, m, tlen))
 			goto findpcb;
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		return;
 	}
 	/*
@@ -998,17 +998,17 @@ relocked:
 	 */
 #ifdef INVARIANTS
 	if ((thflags & (TH_FIN | TH_RST)) != 0)
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 #endif
 	if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
 	      (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
 	       !(tp->t_flags & TF_FASTOPEN)))) {
 		if (ti_locked == TI_UNLOCKED) {
-			if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
+			if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
 				in_pcbref(inp);
 				INP_WUNLOCK(inp);
-				INP_INFO_WLOCK(&V_tcbinfo);
-				ti_locked = TI_WLOCKED;
+				INP_INFO_RLOCK(&V_tcbinfo);
+				ti_locked = TI_RLOCKED;
 				INP_WLOCK(inp);
 				if (in_pcbrele_wlocked(inp)) {
 					inp = NULL;
@@ -1020,9 +1020,9 @@ relocked:
 				}
 				goto relocked;
 			} else
-				ti_locked = TI_WLOCKED;
+				ti_locked = TI_RLOCKED;
 		}
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	}
 
 #ifdef MAC
@@ -1077,7 +1077,7 @@ relocked:
 		 */
 		if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
 
-			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 			/*
 			 * Parse the TCP options here because
 			 * syncookies need access to the reflected
@@ -1132,7 +1132,11 @@ new_tfo_socket:
 			 */
 			INP_WUNLOCK(inp);	/* listen socket */
 			inp = sotoinpcb(so);
-			INP_WLOCK(inp);		/* new connection */
+			/*
+			 * New connection inpcb is already locked by
+			 * syncache_expand().
+			 */
+			INP_WLOCK_ASSERT(inp);
 			tp = intotcpcb(inp);
 			KASSERT(tp->t_state == TCPS_SYN_RECEIVED,
 			    ("%s: ", __func__));
@@ -1368,8 +1372,8 @@ new_tfo_socket:
 		 * Entry added to syncache and mbuf consumed.
 		 * Only the listen socket is unlocked by syncache_add().
 		 */
-		if (ti_locked == TI_WLOCKED) {
-			INP_INFO_WUNLOCK(&V_tcbinfo);
+		if (ti_locked == TI_RLOCKED) {
+			INP_INFO_RUNLOCK(&V_tcbinfo);
 			ti_locked = TI_UNLOCKED;
 		}
 		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
@@ -1418,8 +1422,8 @@ new_tfo_socket:
 dropwithreset:
 	TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
 
-	if (ti_locked == TI_WLOCKED) {
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+	if (ti_locked == TI_RLOCKED) {
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		ti_locked = TI_UNLOCKED;
 	}
 #ifdef INVARIANTS
@@ -1442,8 +1446,8 @@ dropunlock:
 	if (m != NULL)
 		TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
 
-	if (ti_locked == TI_WLOCKED) {
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+	if (ti_locked == TI_RLOCKED) {
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		ti_locked = TI_UNLOCKED;
 	}
 #ifdef INVARIANTS
@@ -1501,13 +1505,13 @@ tcp_do_segment(struct mbuf *m, struct tc
 	 */
 	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
 	    tp->t_state != TCPS_ESTABLISHED) {
-		KASSERT(ti_locked == TI_WLOCKED, ("%s ti_locked %d for "
+		KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
 		    "SYN/FIN/RST/!EST", __func__, ti_locked));
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	} else {
 #ifdef INVARIANTS
-		if (ti_locked == TI_WLOCKED)
-			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		if (ti_locked == TI_RLOCKED)
+			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		else {
 			KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
 			    "ti_locked: %d", __func__, ti_locked));
@@ -1676,8 +1680,8 @@ tcp_do_segment(struct mbuf *m, struct tc
 				/*
 				 * This is a pure ack for outstanding data.
 				 */
-				if (ti_locked == TI_WLOCKED)
-					INP_INFO_WUNLOCK(&V_tcbinfo);
+				if (ti_locked == TI_RLOCKED)
+					INP_INFO_RUNLOCK(&V_tcbinfo);
 				ti_locked = TI_UNLOCKED;
 
 				TCPSTAT_INC(tcps_predack);
@@ -1780,8 +1784,8 @@ tcp_do_segment(struct mbuf *m, struct tc
 			 * nothing on the reassembly queue and we have enough
 			 * buffer space to take it.
 			 */
-			if (ti_locked == TI_WLOCKED)
-				INP_INFO_WUNLOCK(&V_tcbinfo);
+			if (ti_locked == TI_RLOCKED)
+				INP_INFO_RUNLOCK(&V_tcbinfo);
 			ti_locked = TI_UNLOCKED;
 
 			/* Clean receiver SACK report if present */
@@ -2039,9 +2043,9 @@ tcp_do_segment(struct mbuf *m, struct tc
 			tcp_state_change(tp, TCPS_SYN_RECEIVED);
 		}
 
-		KASSERT(ti_locked == TI_WLOCKED, ("%s: trimthenstep6: "
+		KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
 		    "ti_locked %d", __func__, ti_locked));
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		/*
@@ -2169,10 +2173,10 @@ tcp_do_segment(struct mbuf *m, struct tc
 			case TCPS_CLOSE_WAIT:
 				so->so_error = ECONNRESET;
 			close:
-				KASSERT(ti_locked == TI_WLOCKED,
+				KASSERT(ti_locked == TI_RLOCKED,
 				    ("tcp_do_segment: TH_RST 1 ti_locked %d",
 				    ti_locked));
-				INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+				INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 				tcp_state_change(tp, TCPS_CLOSED);
 				TCPSTAT_INC(tcps_drops);
@@ -2181,10 +2185,10 @@ tcp_do_segment(struct mbuf *m, struct tc
 
 			case TCPS_CLOSING:
 			case TCPS_LAST_ACK:
-				KASSERT(ti_locked == TI_WLOCKED,
+				KASSERT(ti_locked == TI_RLOCKED,

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201611241448.uAOEmkk9078800>