Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 30 Apr 2025 16:49:20 GMT
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 626ea75ed2e9 - main - time: use precise callout for clock_nanosleep(2) and nanosleep(2)
Message-ID:  <202504301649.53UGnKKT022361@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=626ea75ed2e9e9365ef8d7a4fa8ef219020c98c6

commit 626ea75ed2e9e9365ef8d7a4fa8ef219020c98c6
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2025-04-30 16:47:57 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2025-04-30 16:47:57 +0000

    time: use precise callout for clock_nanosleep(2) and nanosleep(2)
    
    Don't apply tc_precexp and TIMESEL() that uses sbt_timethreshold (both
    derivatives of kern.timecounter.alloweddeviation) to sleep callout when
    processing the default and precise clocks.  The default timer deviation of
    5% is our internal optimization in the kernel, and we shouldn't leak that
    into the POSIX APIs.  Note that application doesn't have any control to
    cancel the deviation, only a superuser can change the global tunable [with
    side effects].
    
    Leave the deviation for CLOCK_*_FAST and CLOCK_SECOND that are documented
    as imprecise.
    
    Provide a sysctl kern.timecounter.nanosleep_precise that allows to restore
    the previous behavior.
    
    Improve documentation.
    
    Reviewed by:            ziaee, vangyzen, imp, kib
    Differential Revision:  https://reviews.freebsd.org/D50075
---
 lib/libsys/nanosleep.2 | 52 +++++++++++++++++++++++++++++++++++++++++---------
 sys/kern/kern_time.c   | 36 +++++++++++++++++++++++++++-------
 2 files changed, 72 insertions(+), 16 deletions(-)

diff --git a/lib/libsys/nanosleep.2 b/lib/libsys/nanosleep.2
index 8a4931e51413..ba9aae1edf57 100644
--- a/lib/libsys/nanosleep.2
+++ b/lib/libsys/nanosleep.2
@@ -27,7 +27,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd April 3, 2022
+.Dd April 29, 2025
 .Dt NANOSLEEP 2
 .Os
 .Sh NAME
@@ -87,14 +87,6 @@ If, at the time of the call, the time value specified by
 is less than or equal to the time value of the specified clock, then
 .Fn clock_nanosleep
 returns immediately and the calling thread is not suspended.
-.Pp
-The suspension time may be longer than requested due to the
-scheduling of other activity by the system.
-It is also subject to the allowed time interval deviation
-specified by the
-.Va kern.timecounter.alloweddeviation
-.Xr sysctl 8
-variable.
 An unmasked signal will terminate the sleep early, regardless of the
 .Dv SA_RESTART
 value on the interrupting signal.
@@ -131,6 +123,32 @@ CLOCK_UPTIME_FAST
 CLOCK_UPTIME_PRECISE
 .El
 .Pp
+The suspension time may be longer than requested due to the
+scheduling of other activity by the system.
+The clocks with the
+.Dv _FAST
+suffix and the
+.Dv CLOCK_SECOND
+are subject to the allowed time interval deviation specified by the
+.Va kern.timecounter.alloweddeviation
+.Xr sysctl 8
+variable.
+The clocks with the
+.Dv _PRECISE
+suffix are always as precise as possible.
+The
+.Dv CLOCK_MONOTONIC ,
+.Dv CLOCK_REALTIME
+and
+.Dv CLOCK_UPTIME
+are precise by default.
+Setting the
+.Va kern.timecounter.nanosleep_precise
+.Xr sysctl 8
+to a false value would make those clocks to behave like the
+.Dv _FAST
+clocks.
+.Pp
 The
 .Fn nanosleep
 function behaves like
@@ -217,3 +235,19 @@ and was ported to
 .Ox 2.1
 and
 .Fx 3.0 .
+The
+.Fn clock_nanosleep
+system call has been available since
+.Fx 11.1 .
+.Pp
+In
+.Fx 15.0
+the default behavior of
+.Fn clock_nanosleep
+with
+.Dv CLOCK_MONOTONIC ,
+.Dv CLOCK_REALTIME ,
+.Dv CLOCK_UPTIME
+clocks and
+.Fn nanosleep
+has been switched to use precise clock.
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
index d7dc78366292..0c31c1563d99 100644
--- a/sys/kern/kern_time.c
+++ b/sys/kern/kern_time.c
@@ -494,6 +494,10 @@ kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt)
 	    rmt));
 }
 
+static __read_mostly bool nanosleep_precise = true;
+SYSCTL_BOOL(_kern_timecounter, OID_AUTO, nanosleep_precise, CTLFLAG_RW,
+    &nanosleep_precise, 0, "clock_nanosleep() with CLOCK_REALTIME, "
+    "CLOCK_MONOTONIC, CLOCK_UPTIME and nanosleep(2) use precise clock");
 static uint8_t nanowait[MAXCPU];
 
 int
@@ -504,7 +508,7 @@ kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags,
 	sbintime_t sbt, sbtt, prec, tmp;
 	time_t over;
 	int error;
-	bool is_abs_real;
+	bool is_abs_real, precise;
 
 	if (rqt->tv_nsec < 0 || rqt->tv_nsec >= NS_PER_SEC)
 		return (EINVAL);
@@ -512,17 +516,31 @@ kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags,
 		return (EINVAL);
 	switch (clock_id) {
 	case CLOCK_REALTIME:
+		precise = nanosleep_precise;
+		is_abs_real = (flags & TIMER_ABSTIME) != 0;
+		break;
 	case CLOCK_REALTIME_PRECISE:
+		precise = true;
+		is_abs_real = (flags & TIMER_ABSTIME) != 0;
+		break;
 	case CLOCK_REALTIME_FAST:
 	case CLOCK_SECOND:
+		precise = false;
 		is_abs_real = (flags & TIMER_ABSTIME) != 0;
 		break;
 	case CLOCK_MONOTONIC:
-	case CLOCK_MONOTONIC_PRECISE:
-	case CLOCK_MONOTONIC_FAST:
 	case CLOCK_UPTIME:
+		precise = nanosleep_precise;
+		is_abs_real = false;
+		break;
+	case CLOCK_MONOTONIC_PRECISE:
 	case CLOCK_UPTIME_PRECISE:
+		precise = true;
+		is_abs_real = false;
+		break;
+	case CLOCK_MONOTONIC_FAST:
 	case CLOCK_UPTIME_FAST:
+		precise = false;
 		is_abs_real = false;
 		break;
 	case CLOCK_VIRTUAL:
@@ -553,10 +571,14 @@ kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags,
 		} else
 			over = 0;
 		tmp = tstosbt(ts);
-		prec = tmp;
-		prec >>= tc_precexp;
-		if (TIMESEL(&sbt, tmp))
-			sbt += tc_tick_sbt;
+		if (precise) {
+			prec = 0;
+			sbt = sbinuptime();
+		} else {
+			prec = tmp >> tc_precexp;
+			if (TIMESEL(&sbt, tmp))
+				sbt += tc_tick_sbt;
+		}
 		sbt += tmp;
 		error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp",
 		    sbt, prec, C_ABSOLUTE);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202504301649.53UGnKKT022361>