Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 23 Feb 2025 16:20:46 GMT
From:      Mark Johnston <markj@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 8b3d2c19d369 - main - inpcb: Fix reuseport lbgroup array resizing
Message-ID:  <202502231620.51NGKkl6079968@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=8b3d2c19d3691f29d4e86c73bc11491ae3fbfaec

commit 8b3d2c19d3691f29d4e86c73bc11491ae3fbfaec
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2025-02-23 16:20:12 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2025-02-23 16:20:12 +0000

    inpcb: Fix reuseport lbgroup array resizing
    
    in_pcblisten() moves an inpcb from the per-group list into the array, at
    which point it becomes visible to inpcb lookups in the datapath.  It
    assumes that there is space in the array for this, but that's not
    guaranteed, since in_pcbinslbgrouphash() doesn't reserve space in the
    array if the inpcb isn't associated with a listening socket.
    
    We could resize the array in in_pcblisten(), but that would introduce a
    failure case where there currently is none.  Instead, keep track of the
    number of pending inpcbs as well, and modify in_pcbinslbgrouphash() to
    reserve space for each pending (i.e., not-yet-listening) inpcb.
    
    Add a regression test.
    
    Reviewed by:    glebius
    Reported by:    netchild
    Fixes:          7cbb6b6e28db ("inpcb: Close some SO_REUSEPORT_LB races, part 2")
    Differential Revision:  https://reviews.freebsd.org/D49100
---
 sys/netinet/in_pcb.c                     |  7 ++++-
 sys/netinet/in_pcb_var.h                 |  1 +
 tests/sys/netinet/so_reuseport_lb_test.c | 46 ++++++++++++++++++++++++++++++++
 3 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 9d174dce9024..1d9cc1866e15 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -339,6 +339,7 @@ in_pcblbgroup_insert(struct inpcblbgroup *grp, struct inpcb *inp)
 		 * lookups until listen() has been called.
 		 */
 		LIST_INSERT_HEAD(&grp->il_pending, inp, inp_lbgroup_list);
+		grp->il_pendcnt++;
 	} else {
 		grp->il_inp[grp->il_inpcnt] = inp;
 
@@ -375,6 +376,8 @@ in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
 	CK_LIST_INSERT_HEAD(hdr, grp, il_list);
 	LIST_SWAP(&old_grp->il_pending, &grp->il_pending, inpcb,
 	    inp_lbgroup_list);
+	grp->il_pendcnt = old_grp->il_pendcnt;
+	old_grp->il_pendcnt = 0;
 	in_pcblbgroup_free(old_grp);
 	return (grp);
 }
@@ -435,7 +438,7 @@ in_pcbinslbgrouphash(struct inpcb *inp, uint8_t numa_domain)
 			return (ENOBUFS);
 		in_pcblbgroup_insert(grp, inp);
 		CK_LIST_INSERT_HEAD(hdr, grp, il_list);
-	} else if (grp->il_inpcnt == grp->il_inpsiz) {
+	} else if (grp->il_inpcnt + grp->il_pendcnt == grp->il_inpsiz) {
 		if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) {
 			if (ratecheck(&lastprint, &interval))
 				printf("lb group port %d, limit reached\n",
@@ -499,6 +502,7 @@ in_pcbremlbgrouphash(struct inpcb *inp)
 		LIST_FOREACH(inp1, &grp->il_pending, inp_lbgroup_list) {
 			if (inp == inp1) {
 				LIST_REMOVE(inp, inp_lbgroup_list);
+				grp->il_pendcnt--;
 				inp->inp_flags &= ~INP_INLBGROUP;
 				return;
 			}
@@ -1503,6 +1507,7 @@ in_pcblisten(struct inpcb *inp)
 		INP_HASH_WLOCK(pcbinfo);
 		grp = in_pcblbgroup_find(inp);
 		LIST_REMOVE(inp, inp_lbgroup_list);
+		grp->il_pendcnt--;
 		in_pcblbgroup_insert(grp, inp);
 		INP_HASH_WUNLOCK(pcbinfo);
 	}
diff --git a/sys/netinet/in_pcb_var.h b/sys/netinet/in_pcb_var.h
index e2b0ca386e7f..32fdbced175c 100644
--- a/sys/netinet/in_pcb_var.h
+++ b/sys/netinet/in_pcb_var.h
@@ -82,6 +82,7 @@ struct inpcblbgroup {
 #define	il6_laddr	il_dependladdr.id6_addr
 	uint32_t	il_inpsiz; /* max count in il_inp[] (h) */
 	uint32_t	il_inpcnt; /* cur count in il_inp[] (h) */
+	uint32_t	il_pendcnt; /* cur count in il_pending (h) */
 	struct inpcb	*il_inp[];			/* (h) */
 };
 
diff --git a/tests/sys/netinet/so_reuseport_lb_test.c b/tests/sys/netinet/so_reuseport_lb_test.c
index 09d8e0ce8f83..aaadaead5e23 100644
--- a/tests/sys/netinet/so_reuseport_lb_test.c
+++ b/tests/sys/netinet/so_reuseport_lb_test.c
@@ -433,6 +433,51 @@ ATF_TC_BODY(double_listen_ipv6, tc)
 	ATF_REQUIRE_MSG(error == 0, "close() failed: %s", strerror(errno));
 }
 
+/*
+ * Try binding many sockets to the same lbgroup without calling listen(2) on
+ * them.
+ */
+ATF_TC_WITHOUT_HEAD(bind_without_listen);
+ATF_TC_BODY(bind_without_listen, tc)
+{
+	const int nsockets = 100;
+	struct sockaddr_in sin;
+	socklen_t socklen;
+	int error, s, s2[nsockets];
+
+	s = lb_listen_socket(PF_INET, 0);
+
+	memset(&sin, 0, sizeof(sin));
+	sin.sin_len = sizeof(sin);
+	sin.sin_family = AF_INET;
+	sin.sin_port = htons(0);
+	sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	error = bind(s, (struct sockaddr *)&sin, sizeof(sin));
+	ATF_REQUIRE_MSG(error == 0, "bind() failed: %s", strerror(errno));
+
+	socklen = sizeof(sin);
+	error = getsockname(s, (struct sockaddr *)&sin, &socklen);
+	ATF_REQUIRE_MSG(error == 0, "getsockname() failed: %s",
+	    strerror(errno));
+
+	for (int i = 0; i < nsockets; i++) {
+		s2[i] = lb_listen_socket(PF_INET, 0);
+		error = bind(s2[i], (struct sockaddr *)&sin, sizeof(sin));
+		ATF_REQUIRE_MSG(error == 0, "bind() failed: %s", strerror(errno));
+	}
+	for (int i = 0; i < nsockets; i++) {
+		error = listen(s2[i], 1);
+		ATF_REQUIRE_MSG(error == 0, "listen() failed: %s", strerror(errno));
+	}
+	for (int i = 0; i < nsockets; i++) {
+		error = close(s2[i]);
+		ATF_REQUIRE_MSG(error == 0, "close() failed: %s", strerror(errno));
+	}
+
+	error = close(s);
+	ATF_REQUIRE_MSG(error == 0, "close() failed: %s", strerror(errno));
+}
+
 ATF_TP_ADD_TCS(tp)
 {
 	ATF_TP_ADD_TC(tp, basic_ipv4);
@@ -440,6 +485,7 @@ ATF_TP_ADD_TCS(tp)
 	ATF_TP_ADD_TC(tp, concurrent_add);
 	ATF_TP_ADD_TC(tp, double_listen_ipv4);
 	ATF_TP_ADD_TC(tp, double_listen_ipv6);
+	ATF_TP_ADD_TC(tp, bind_without_listen);
 
 	return (atf_no_error());
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202502231620.51NGKkl6079968>