Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 15 Sep 2019 08:16:29 +0000 (UTC)
From:      "Bjoern A. Zeeb" <bz@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org
Subject:   svn commit: r352352 - stable/12/sys/netinet
Message-ID:  <201909150816.x8F8GTe3006853@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: bz
Date: Sun Sep 15 08:16:28 2019
New Revision: 352352
URL: https://svnweb.freebsd.org/changeset/base/352352

Log:
  MFC 346554,346556,346595,348060,348061,348494 udp locking fixes
  
    Fix multiple possible locking problems found by syzkaller and
    update comment (which was wrong already anyway due to previous
    changes).
    Improve KASSERTs for debugging lock related issues.
    Fold two RSS sections together.

Modified:
  stable/12/sys/netinet/udp_usrreq.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/netinet/udp_usrreq.c
==============================================================================
--- stable/12/sys/netinet/udp_usrreq.c	Sun Sep 15 04:14:31 2019	(r352351)
+++ stable/12/sys/netinet/udp_usrreq.c	Sun Sep 15 08:16:28 2019	(r352352)
@@ -1156,9 +1156,23 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct s
 
 	src.sin_family = 0;
 	sin = (struct sockaddr_in *)addr;
+retry:
 	if (sin == NULL ||
 	    (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
 		INP_WLOCK(inp);
+		/*
+		 * In case we lost a race and another thread bound addr/port
+		 * on the inp we cannot keep the wlock (which still would be
+		 * fine) as further down, based on these values we make
+		 * decisions for the pcbinfo lock.  If the locks are not in
+		 * synch the assertions on unlock will fire, hence we go for
+		 * one retry loop.
+		 */
+		if (sin != NULL && (inp->inp_laddr.s_addr != INADDR_ANY ||
+		    inp->inp_lport != 0)) {
+			INP_WUNLOCK(inp);
+			goto retry;
+		}
 		unlock_inp = UH_WLOCKED;
 	} else {
 		INP_RLOCK(inp);
@@ -1258,36 +1272,44 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct s
 	}
 
 	/*
-	 * Depending on whether or not the application has bound or connected
-	 * the socket, we may have to do varying levels of work.  The optimal
-	 * case is for a connected UDP socket, as a global lock isn't
-	 * required at all.
-	 *
-	 * In order to decide which we need, we require stability of the
-	 * inpcb binding, which we ensure by acquiring a read lock on the
-	 * inpcb.  This doesn't strictly follow the lock order, so we play
-	 * the trylock and retry game; note that we may end up with more
-	 * conservative locks than required the second time around, so later
-	 * assertions have to accept that.  Further analysis of the number of
-	 * misses under contention is required.
-	 *
-	 * XXXRW: Check that hash locking update here is correct.
+	 * In the old days, depending on whether or not the application had
+	 * bound or connected the socket, we had to do varying levels of work.
+	 * The optimal case was for a connected UDP socket, as a global lock
+	 * wasn't required at all.
+	 * In order to decide which we need, we required stability of the
+	 * inpcb binding, which we ensured by acquiring a read lock on the
+	 * inpcb.  This didn't strictly follow the lock order, so we played
+	 * the trylock and retry game.
+	 * With the re-introduction of the route-cache in some cases, we started
+	 * to acquire an early inp wlock and a possible race during re-lock
+	 * went away.  With the introduction of epoch(9) some read locking
+	 * became epoch(9) and the lock-order issues also went away.
+	 * Due to route-cache we may now hold more conservative locks than
+	 * otherwise required and have split up the 2nd case in case 2 and 3
+	 * in order to keep the udpinfo lock level in sync with the inp one
+	 * for the IP_SENDSRCADDR case below.
 	 */
 	pr = inp->inp_socket->so_proto->pr_protocol;
 	pcbinfo = udp_get_inpcbinfo(pr);
-	sin = (struct sockaddr_in *)addr;
 	if (sin != NULL &&
 	    (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
 		INP_HASH_WLOCK(pcbinfo);
 		unlock_udbinfo = UH_WLOCKED;
-	} else if ((sin != NULL && (
-	    (sin->sin_addr.s_addr == INADDR_ANY) ||
-	    (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
-	    (inp->inp_laddr.s_addr == INADDR_ANY) ||
-	    (inp->inp_lport == 0))) ||
-	    (src.sin_family == AF_INET)) {
+	} else if (sin != NULL &&
+	    (sin->sin_addr.s_addr == INADDR_ANY ||
+	    sin->sin_addr.s_addr == INADDR_BROADCAST ||
+	    inp->inp_laddr.s_addr == INADDR_ANY ||
+	    inp->inp_lport == 0)) {
 		INP_HASH_RLOCK_ET(pcbinfo, et);
 		unlock_udbinfo = UH_RLOCKED;
+	} else if (src.sin_family == AF_INET) {
+		if (unlock_inp == UH_WLOCKED) {
+			INP_HASH_WLOCK(pcbinfo);
+			unlock_udbinfo = UH_WLOCKED;
+		} else {
+			INP_HASH_RLOCK_ET(pcbinfo, et);
+			unlock_udbinfo = UH_RLOCKED;
+		}
 	} else
 		unlock_udbinfo = UH_UNLOCKED;
 
@@ -1497,8 +1519,9 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct s
 	if (flowtype != M_HASHTYPE_NONE) {
 		m->m_pkthdr.flowid = flowid;
 		M_HASHTYPE_SET(m, flowtype);
+	}
 #ifdef	RSS
-	} else {
+	else {
 		uint32_t hash_val, hash_type;
 		/*
 		 * Calculate an appropriate RSS hash for UDP and
@@ -1521,10 +1544,8 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct s
 			m->m_pkthdr.flowid = hash_val;
 			M_HASHTYPE_SET(m, hash_type);
 		}
-#endif
 	}
 
-#ifdef	RSS
 	/*
 	 * Don't override with the inp cached flowid value.
 	 *
@@ -1559,12 +1580,22 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct s
 release:
 	if (unlock_udbinfo == UH_WLOCKED) {
 		KASSERT(unlock_inp == UH_WLOCKED,
-		    ("%s: excl udbinfo lock, shared inp lock", __func__));
+		    ("%s: excl udbinfo lock %#03x, shared inp lock %#03x, "
+		    "sin %p daddr %#010x inp %p laddr %#010x lport %#06x "
+		    "src fam %#04x",
+		    __func__, unlock_udbinfo, unlock_inp, sin,
+		    (sin != NULL) ? sin->sin_addr.s_addr : 0xfefefefe, inp,
+		    inp->inp_laddr.s_addr, inp->inp_lport, src.sin_family));
 		INP_HASH_WUNLOCK(pcbinfo);
 		INP_WUNLOCK(inp);
 	} else if (unlock_udbinfo == UH_RLOCKED) {
 		KASSERT(unlock_inp == UH_RLOCKED,
-		    ("%s: shared udbinfo lock, excl inp lock", __func__));
+		    ("%s: shared udbinfo lock %#03x, excl inp lock %#03x, "
+		    "sin %p daddr %#010x inp %p laddr %#010x lport %#06x "
+		    "src fam %#04x",
+		    __func__, unlock_udbinfo, unlock_inp, sin,
+		    (sin != NULL) ? sin->sin_addr.s_addr : 0xfefefefe, inp,
+		    inp->inp_laddr.s_addr, inp->inp_lport, src.sin_family));
 		INP_HASH_RUNLOCK_ET(pcbinfo, et);
 		INP_RUNLOCK(inp);
 	} else if (unlock_inp == UH_WLOCKED)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201909150816.x8F8GTe3006853>