Date: Wed, 29 Oct 2025 16:32:57 GMT From: Kyle Evans <kevans@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: d617806aac14 - main - libc: report _SC_NPROCESSORS_ONLN more accurately in cpu-limited jails Message-ID: <202510291632.59TGWvbF044008@gitrepo.freebsd.org>
index | next in thread | raw e-mail
The branch main has been updated by kevans: URL: https://cgit.FreeBSD.org/src/commit/?id=d617806aac1469319970e3551656e9deabb98a35 commit d617806aac1469319970e3551656e9deabb98a35 Author: Kyle Evans <kevans@FreeBSD.org> AuthorDate: 2025-10-29 16:32:30 +0000 Commit: Kyle Evans <kevans@FreeBSD.org> CommitDate: 2025-10-29 16:32:30 +0000 libc: report _SC_NPROCESSORS_ONLN more accurately in cpu-limited jails We don't support CPU hotplug, but we do support cpuset(8) restrictions on jails (including prison0, which uses cpuset 1). The process cannot widen its cpuset beyond its root set, so it makes sense to instead report the number of cpus enabled there rather than the total number in the system. This change is effectively a nop for the majority of systems and jails in the wild, though it does reduce the performance of this query now that we can't take advantage of AT_NCPUS being provided in the auxinfo. The implementation here is notably different than Linux, which would not take cgroups into account. They do, however, take CPU hotplug into account, so the possibility for it to diverge from (and be lower than) the # configured count to reflect what the process can actually be scheduled on doesn't really diverge in semantics. Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D52295 --- lib/libc/gen/sysconf.3 | 6 +- lib/libc/gen/sysconf.c | 16 ++++- lib/libc/tests/sys/cpuset_test.c | 140 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 156 insertions(+), 6 deletions(-) diff --git a/lib/libc/gen/sysconf.3 b/lib/libc/gen/sysconf.3 index e38357b898a7..290ef0dc158c 100644 --- a/lib/libc/gen/sysconf.3 +++ b/lib/libc/gen/sysconf.3 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd April 26, 2013 +.Dd August 30, 2025 .Dt SYSCONF 3 .Os .Sh NAME @@ -77,7 +77,9 @@ The maximum number of supplemental groups. .It Li _SC_NPROCESSORS_CONF The number of processors configured. .It Li _SC_NPROCESSORS_ONLN -The number of processors currently online. +The number of processors currently online, taking into account current jail +restrictions to report only the number of processors that are usable to the +process. .It Li _SC_OPEN_MAX One more than the maximum value the system may assign to a new file descriptor. .It Li _SC_PAGESIZE diff --git a/lib/libc/gen/sysconf.c b/lib/libc/gen/sysconf.c index b5b732eed05d..87aedc07c110 100644 --- a/lib/libc/gen/sysconf.c +++ b/lib/libc/gen/sysconf.c @@ -72,6 +72,7 @@ long sysconf(int name) { struct rlimit rl; + cpuset_t cpus; size_t len; int mib[2], sverrno, value; long lvalue, defaultresult; @@ -581,8 +582,21 @@ yesno: return (_POSIX_IPV6); #endif - case _SC_NPROCESSORS_CONF: case _SC_NPROCESSORS_ONLN: + /* + * Consult our root set first, because our CPU availability + * may not match the total number of CPUs available on the + * system and we may have a non-uniform layout even within + * userland. In particular, each jail has a root set that can + * be constrained by its parent and processes within the jail + * cannot widen beyond those constraints, so to those processes + * it makes sense to claim the more limited count. + */ + if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, + sizeof(cpus), &cpus) == 0) + return (CPU_COUNT(&cpus)); + /* FALLTHROUGH */ + case _SC_NPROCESSORS_CONF: if (_elf_aux_info(AT_NCPUS, &value, sizeof(value)) == 0) return ((long)value); mib[0] = CTL_HW; diff --git a/lib/libc/tests/sys/cpuset_test.c b/lib/libc/tests/sys/cpuset_test.c index 53d6a8215bbc..c8ad225fadfc 100644 --- a/lib/libc/tests/sys/cpuset_test.c +++ b/lib/libc/tests/sys/cpuset_test.c @@ -34,8 +34,10 @@ #include <sys/uio.h> #include <sys/wait.h> +#include <assert.h> #include <errno.h> #include <stdio.h> +#include <stdlib.h> #include <unistd.h> #include <atf-c.h> @@ -107,6 +109,19 @@ skip_ltncpu(int ncpu, cpuset_t *mask) atf_tc_skip("Test requires %d or more cores.", ncpu); } +static void +skip_ltncpu_root(int ncpu, cpuset_t *mask) +{ + + CPU_ZERO(mask); + ATF_REQUIRE_EQ(0, cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, + -1, sizeof(*mask), mask)); + if (CPU_COUNT(mask) < ncpu) { + atf_tc_skip("Test requires cpuset root with %d or more cores.", + ncpu); + } +} + ATF_TC(newset); ATF_TC_HEAD(newset, tc) { @@ -234,9 +249,8 @@ ATF_TC_BODY(deadlk, tc) } static int -do_jail(int sock) +create_jail(void) { - struct jail_test_info info; struct iovec iov[2]; char *name; int error; @@ -250,8 +264,22 @@ do_jail(int sock) iov[1].iov_base = name; iov[1].iov_len = strlen(name) + 1; - if (jail_set(iov, 2, JAIL_CREATE | JAIL_ATTACH) < 0) + error = jail_set(iov, 2, JAIL_CREATE | JAIL_ATTACH); + free(name); + if (error < 0) return (FAILURE_JAIL); + return (0); +} + +static int +do_jail(int sock) +{ + struct jail_test_info info; + int error; + + error = create_jail(); + if (error != 0) + return (error); /* Record parameters, kick them over, then make a swift exit. */ CPU_ZERO(&info.jail_tidmask); @@ -641,6 +669,111 @@ ATF_TC_BODY(jail_attach_disjoint, tc) try_attach(jid, &smask); } +struct nproc_info { + long nproc_init; + long nproc_final; + long nproc_global; +}; + +ATF_TC(jail_nproc); +ATF_TC_HEAD(jail_nproc, tc) +{ + atf_tc_set_md_var(tc, "descr", + "Test that _SC_PROCESSORS_ONLN reflects jail cpuset constraints"); +} +ATF_TC_BODY(jail_nproc, tc) +{ + cpuset_t jmask; + struct nproc_info ninfo = { }; + int sockpair[2]; + cpusetid_t setid; + ssize_t readsz; + pid_t pid; + int fcpu, error, pfd, sock; + char okb = 0x7f, rcvb; + + skip_ltncpu_root(2, &jmask); + fcpu = CPU_FFS(&jmask) - 1; + + /* + * Just adjusting our affinity should not affect the number of + * processors considered online- we want to be sure that it's only + * adjusted if our jail's root set is. + */ + CPU_CLR(fcpu, &jmask); + error = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, + sizeof(jmask), &jmask); + ATF_REQUIRE_EQ(0, error); + ATF_REQUIRE(sysconf(_SC_NPROCESSORS_ONLN) > CPU_COUNT(&jmask)); + + ATF_REQUIRE_EQ(0, socketpair(PF_UNIX, SOCK_STREAM, 0, sockpair)); + + /* We'll wait on the procdesc, too, so we can fail faster if it dies. */ + ATF_REQUIRE((pid = pdfork(&pfd, 0)) != -1); + + if (pid == 0) { + /* First child sets up the jail. */ + sock = sockpair[SP_CHILD]; + close(sockpair[SP_PARENT]); + + error = create_jail(); + if (error != 0) + _exit(error); + + ninfo.nproc_init = sysconf(_SC_NPROCESSORS_ONLN); + + /* Signal the parent that we're jailed. */ + readsz = write(sock, &okb, sizeof(okb)); + assert(readsz == sizeof(okb)); + + /* Wait for parent to adjust our mask and signal OK. */ + readsz = read(sock, &rcvb, sizeof(rcvb)); + assert(readsz == sizeof(rcvb)); + assert(rcvb == okb); + + ninfo.nproc_final = sysconf(_SC_NPROCESSORS_ONLN); + ninfo.nproc_global = sysconf(_SC_NPROCESSORS_CONF); + readsz = write(sock, &ninfo, sizeof(ninfo)); + assert(readsz == sizeof(ninfo)); + + _exit(0); + } + + close(sockpair[SP_CHILD]); + sock = sockpair[SP_PARENT]; + + /* Wait for signal that they are jailed. */ + readsz = read(sock, &rcvb, sizeof(rcvb)); + assert(readsz == sizeof(rcvb)); + assert(rcvb == okb); + + /* Grab the cpuset id and adjust it. */ + error = cpuset_getid(CPU_LEVEL_ROOT, CPU_WHICH_PID, pid, &setid); + ATF_REQUIRE_EQ(0, error); + error = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_CPUSET, + setid, sizeof(jmask), &jmask); + ATF_REQUIRE_EQ(0, error); + + /* Signal OK to proceed. */ + readsz = write(sock, &okb, sizeof(okb)); + ATF_REQUIRE_EQ(sizeof(okb), readsz); + + /* Grab our final nproc info. */ + readsz = read(sock, &ninfo, sizeof(ninfo)); + ATF_REQUIRE_EQ(sizeof(ninfo), readsz); + + /* + * We set our own affinity to jmask, which is derived from *our* root + * set, at the beginning of the test. The jail would inherit from this + * set, so we just re-use that mask here to confirm that + * _SC_NPROCESSORS_ONLN did actually drop in response to us limiting the + * jail, and that its _SC_NPROCESSORS_CONF did not. + */ + ATF_REQUIRE_EQ(CPU_COUNT(&jmask) + 1, ninfo.nproc_init); + ATF_REQUIRE_EQ(CPU_COUNT(&jmask) + 1, ninfo.nproc_global); + ATF_REQUIRE_EQ(CPU_COUNT(&jmask), ninfo.nproc_final); +} + ATF_TC(badparent); ATF_TC_HEAD(badparent, tc) { @@ -686,6 +819,7 @@ ATF_TP_ADD_TCS(tp) ATF_TP_ADD_TC(tp, jail_attach_prevbase); ATF_TP_ADD_TC(tp, jail_attach_plain); ATF_TP_ADD_TC(tp, jail_attach_disjoint); + ATF_TP_ADD_TC(tp, jail_nproc); ATF_TP_ADD_TC(tp, badparent); return (atf_no_error()); }home | help
Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202510291632.59TGWvbF044008>
