Date: Sun, 23 Feb 2025 16:20:46 GMT From: Mark Johnston <markj@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 8b3d2c19d369 - main - inpcb: Fix reuseport lbgroup array resizing Message-ID: <202502231620.51NGKkl6079968@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by markj: URL: https://cgit.FreeBSD.org/src/commit/?id=8b3d2c19d3691f29d4e86c73bc11491ae3fbfaec commit 8b3d2c19d3691f29d4e86c73bc11491ae3fbfaec Author: Mark Johnston <markj@FreeBSD.org> AuthorDate: 2025-02-23 16:20:12 +0000 Commit: Mark Johnston <markj@FreeBSD.org> CommitDate: 2025-02-23 16:20:12 +0000 inpcb: Fix reuseport lbgroup array resizing in_pcblisten() moves an inpcb from the per-group list into the array, at which point it becomes visible to inpcb lookups in the datapath. It assumes that there is space in the array for this, but that's not guaranteed, since in_pcbinslbgrouphash() doesn't reserve space in the array if the inpcb isn't associated with a listening socket. We could resize the array in in_pcblisten(), but that would introduce a failure case where there currently is none. Instead, keep track of the number of pending inpcbs as well, and modify in_pcbinslbgrouphash() to reserve space for each pending (i.e., not-yet-listening) inpcb. Add a regression test. Reviewed by: glebius Reported by: netchild Fixes: 7cbb6b6e28db ("inpcb: Close some SO_REUSEPORT_LB races, part 2") Differential Revision: https://reviews.freebsd.org/D49100 --- sys/netinet/in_pcb.c | 7 ++++- sys/netinet/in_pcb_var.h | 1 + tests/sys/netinet/so_reuseport_lb_test.c | 46 ++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 9d174dce9024..1d9cc1866e15 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -339,6 +339,7 @@ in_pcblbgroup_insert(struct inpcblbgroup *grp, struct inpcb *inp) * lookups until listen() has been called. */ LIST_INSERT_HEAD(&grp->il_pending, inp, inp_lbgroup_list); + grp->il_pendcnt++; } else { grp->il_inp[grp->il_inpcnt] = inp; @@ -375,6 +376,8 @@ in_pcblbgroup_resize(struct inpcblbgrouphead *hdr, CK_LIST_INSERT_HEAD(hdr, grp, il_list); LIST_SWAP(&old_grp->il_pending, &grp->il_pending, inpcb, inp_lbgroup_list); + grp->il_pendcnt = old_grp->il_pendcnt; + old_grp->il_pendcnt = 0; in_pcblbgroup_free(old_grp); return (grp); } @@ -435,7 +438,7 @@ in_pcbinslbgrouphash(struct inpcb *inp, uint8_t numa_domain) return (ENOBUFS); in_pcblbgroup_insert(grp, inp); CK_LIST_INSERT_HEAD(hdr, grp, il_list); - } else if (grp->il_inpcnt == grp->il_inpsiz) { + } else if (grp->il_inpcnt + grp->il_pendcnt == grp->il_inpsiz) { if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) { if (ratecheck(&lastprint, &interval)) printf("lb group port %d, limit reached\n", @@ -499,6 +502,7 @@ in_pcbremlbgrouphash(struct inpcb *inp) LIST_FOREACH(inp1, &grp->il_pending, inp_lbgroup_list) { if (inp == inp1) { LIST_REMOVE(inp, inp_lbgroup_list); + grp->il_pendcnt--; inp->inp_flags &= ~INP_INLBGROUP; return; } @@ -1503,6 +1507,7 @@ in_pcblisten(struct inpcb *inp) INP_HASH_WLOCK(pcbinfo); grp = in_pcblbgroup_find(inp); LIST_REMOVE(inp, inp_lbgroup_list); + grp->il_pendcnt--; in_pcblbgroup_insert(grp, inp); INP_HASH_WUNLOCK(pcbinfo); } diff --git a/sys/netinet/in_pcb_var.h b/sys/netinet/in_pcb_var.h index e2b0ca386e7f..32fdbced175c 100644 --- a/sys/netinet/in_pcb_var.h +++ b/sys/netinet/in_pcb_var.h @@ -82,6 +82,7 @@ struct inpcblbgroup { #define il6_laddr il_dependladdr.id6_addr uint32_t il_inpsiz; /* max count in il_inp[] (h) */ uint32_t il_inpcnt; /* cur count in il_inp[] (h) */ + uint32_t il_pendcnt; /* cur count in il_pending (h) */ struct inpcb *il_inp[]; /* (h) */ }; diff --git a/tests/sys/netinet/so_reuseport_lb_test.c b/tests/sys/netinet/so_reuseport_lb_test.c index 09d8e0ce8f83..aaadaead5e23 100644 --- a/tests/sys/netinet/so_reuseport_lb_test.c +++ b/tests/sys/netinet/so_reuseport_lb_test.c @@ -433,6 +433,51 @@ ATF_TC_BODY(double_listen_ipv6, tc) ATF_REQUIRE_MSG(error == 0, "close() failed: %s", strerror(errno)); } +/* + * Try binding many sockets to the same lbgroup without calling listen(2) on + * them. + */ +ATF_TC_WITHOUT_HEAD(bind_without_listen); +ATF_TC_BODY(bind_without_listen, tc) +{ + const int nsockets = 100; + struct sockaddr_in sin; + socklen_t socklen; + int error, s, s2[nsockets]; + + s = lb_listen_socket(PF_INET, 0); + + memset(&sin, 0, sizeof(sin)); + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_port = htons(0); + sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + error = bind(s, (struct sockaddr *)&sin, sizeof(sin)); + ATF_REQUIRE_MSG(error == 0, "bind() failed: %s", strerror(errno)); + + socklen = sizeof(sin); + error = getsockname(s, (struct sockaddr *)&sin, &socklen); + ATF_REQUIRE_MSG(error == 0, "getsockname() failed: %s", + strerror(errno)); + + for (int i = 0; i < nsockets; i++) { + s2[i] = lb_listen_socket(PF_INET, 0); + error = bind(s2[i], (struct sockaddr *)&sin, sizeof(sin)); + ATF_REQUIRE_MSG(error == 0, "bind() failed: %s", strerror(errno)); + } + for (int i = 0; i < nsockets; i++) { + error = listen(s2[i], 1); + ATF_REQUIRE_MSG(error == 0, "listen() failed: %s", strerror(errno)); + } + for (int i = 0; i < nsockets; i++) { + error = close(s2[i]); + ATF_REQUIRE_MSG(error == 0, "close() failed: %s", strerror(errno)); + } + + error = close(s); + ATF_REQUIRE_MSG(error == 0, "close() failed: %s", strerror(errno)); +} + ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, basic_ipv4); @@ -440,6 +485,7 @@ ATF_TP_ADD_TCS(tp) ATF_TP_ADD_TC(tp, concurrent_add); ATF_TP_ADD_TC(tp, double_listen_ipv4); ATF_TP_ADD_TC(tp, double_listen_ipv6); + ATF_TP_ADD_TC(tp, bind_without_listen); return (atf_no_error()); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202502231620.51NGKkl6079968>