Date: Sat, 2 Jun 2018 13:07:29 +0200 From: =?UTF-8?Q?Jan_Kokem=c3=bcller?= <jan.kokemueller@gmail.com> To: freebsd-hackers@freebsd.org Subject: Fixing CLOCK_MONOTONIC during ACPI suspend Message-ID: <fa2e524c-172b-a3cc-a6d0-6963b20e1452@gmail.com>
next in thread | raw e-mail | index | archive | help
Hi, I'm trying to fix the monotonic clock during suspend. Currently, IPv6 timers don't expire during suspend because time stops. See also this mail [1] by Bruce Evans. I've been running the attached (very rough) patch for some time now. It saves the RTC before suspend and adds the delta after resume to the current time. Some open issues/questions: - kevent/poll semantics would change because those calls implicitly use CLOCK_MONOTONIC. Linux kept CLOCK_MONOTONIC as is and introduced CLOCK_BOOTTIME with 'fixed' semantics. - What should happen to CLOCK_UPTIME? When OpenBSD fixed CLOCK_MONOTONIC, they kept CLOCK_UPTIME at the old behavior. They needed to switch some programs from CLOCK_MONOTONIC to CLOCK_UPTIME because they relied on the old behaviour. What do you think? [1]: https://lists.freebsd.org/pipermail/freebsd-arch/2013-August/014707.html diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c index b9de85d236ed..0b0d2418737e 100644 --- a/sys/dev/acpica/acpi.c +++ b/sys/dev/acpica/acpi.c @@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/kernel.h> +#include <sys/clock.h> #include <sys/proc.h> #include <sys/fcntl.h> #include <sys/malloc.h> @@ -161,7 +162,7 @@ static ACPI_STATUS acpi_EnterSleepState(struct acpi_softc *sc, int state); static void acpi_shutdown_final(void *arg, int howto); static void acpi_enable_fixed_events(struct acpi_softc *sc); static BOOLEAN acpi_has_hid(ACPI_HANDLE handle); -static void acpi_resync_clock(struct acpi_softc *sc); +static void acpi_resync_clock(struct acpi_softc *sc, struct timespec *naptime); static int acpi_wake_sleep_prep(ACPI_HANDLE handle, int sstate); static int acpi_wake_run_prep(ACPI_HANDLE handle, int sstate); static int acpi_wake_prep_walk(int sstate); @@ -2862,6 +2863,10 @@ enum acpi_sleep_state { ACPI_SS_SLEPT, }; +void tc_reset_offset(void); + +// static struct timespec old_sys_rtc_delta; + /* * Enter the desired system sleep state. * @@ -2934,6 +2939,25 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state) acpi_wake_prep_walk(state); slp_state = ACPI_SS_GPE_SET; + + struct timespec old_rtc_ts = { 0 }; + getrtctime(&old_rtc_ts); + + struct timespec old_sys_ts; + nanouptime(&old_sys_ts); + + // struct timespec sys_rtc_delta = old_sys_ts; + // timespecsub(&sys_rtc_delta, &old_rtc_ts); + + // struct timespec sys_rtc_delta_delta = sys_rtc_delta; + // timespecsub(&sys_rtc_delta_delta, &old_sys_rtc_delta); + + // if (sys_rtc_delta_delta.tv_sec <= -2 || sys_rtc_delta_delta.tv_sec >= 2) { + // old_sys_rtc_delta = sys_rtc_delta; + // } else { + // timespecsub(&old_sys_ts, &sys_rtc_delta_delta); + // } + /* * Inform all devices that we are going to sleep. If at least one * device fails, DEVICE_SUSPEND() automatically resumes the tree. @@ -2946,6 +2970,7 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state) device_printf(sc->acpi_dev, "device_suspend failed\n"); goto backout; } + slp_state = ACPI_SS_DEV_SUSPEND; status = AcpiEnterSleepStatePrep(state); @@ -2956,15 +2981,50 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state) } slp_state = ACPI_SS_SLP_PREP; + // printf("print ACPI 1\n"); + if (sc->acpi_sleep_delay > 0) DELAY(sc->acpi_sleep_delay * 1000000); suspendclock(); + // printf("print ACPI 2, clock suspended\n"); + intr = intr_disable(); + // printf("print ACPI 3\n"); if (state != ACPI_STATE_S1) { + // printf("print ACPI 4\n"); sleep_result = acpi_sleep_machdep(sc, state); + // printf("print ACPI 4 pre warmup, sleepresult %d\n", (int)sleep_result); + + /* + * Warm up timecounter again. + */ + // (void)timecounter->tc_get_timecount(timecounter); + // (void)timecounter->tc_get_timecount(timecounter); + // (void)timecounter->tc_get_timecount(timecounter); + // (void)timecounter->tc_get_timecount(timecounter); + // (void)timecounter->tc_get_timecount(timecounter); + + // printf("print ACPI 4 post warmup\n"); + + tc_reset_offset(); + acpi_wakeup_machdep(sc, state, sleep_result, 0); + // printf("print ACPI 5\n"); + // printf("print ACPI 5 pre warmup\n"); + + /* + * Warm up timecounter again. + */ + // (void)timecounter->tc_get_timecount(timecounter); + // (void)timecounter->tc_get_timecount(timecounter); + // (void)timecounter->tc_get_timecount(timecounter); + // (void)timecounter->tc_get_timecount(timecounter); + // (void)timecounter->tc_get_timecount(timecounter); + + // printf("print ACPI 5 post warmup\n"); + /* * XXX According to ACPI specification SCI_EN bit should be restored * by ACPI platform (BIOS, firmware) to its pre-sleep state. @@ -2979,6 +3039,8 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state) AcpiLeaveSleepStatePrep(state); + // printf("print ACPI 6\n"); + if (sleep_result == 1 && state == ACPI_STATE_S3) { /* * Prevent mis-interpretation of the wakeup by power button @@ -3002,17 +3064,25 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state) } } + // printf("print ACPI 7\n"); + intr_restore(intr); + // printf("print ACPI 8\n"); + /* call acpi_wakeup_machdep() again with interrupt enabled */ acpi_wakeup_machdep(sc, state, sleep_result, 1); + // printf("print ACPI 9\n"); + if (sleep_result == -1) goto backout; /* Re-enable ACPI hardware on wakeup from sleep state 4. */ if (state == ACPI_STATE_S4) AcpiEnable(); + + // printf("print ACPI 10\n"); } else { status = AcpiEnterSleepState(state); AcpiLeaveSleepStatePrep(state); @@ -3032,21 +3102,65 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state) backout: if (slp_state >= ACPI_SS_SLP_PREP) resumeclock(); + // printf("print ACPI 11, clock resumed\n"); if (slp_state >= ACPI_SS_GPE_SET) { acpi_wake_prep_walk(state); sc->acpi_sstate = ACPI_STATE_S0; } + // printf("print ACPI 12\n"); + + // printf("print ACPI 12 pre warmup\n"); + + /* + * Warm up timecounter again. + */ + (void)timecounter->tc_get_timecount(timecounter); + (void)timecounter->tc_get_timecount(timecounter); + + // printf("print ACPI 12 post warmup\n"); + if (slp_state >= ACPI_SS_DEV_SUSPEND) DEVICE_RESUME(root_bus); + // printf("print ACPI 13\n"); if (slp_state >= ACPI_SS_SLP_PREP) AcpiLeaveSleepState(state); + // printf("print ACPI 14\n"); if (slp_state >= ACPI_SS_SLEPT) { #if defined(__i386__) || defined(__amd64__) /* NB: we are still using ACPI timecounter at this point. */ resume_TSC(); #endif - acpi_resync_clock(sc); + + struct timespec new_sys_ts; + nanouptime(&new_sys_ts); + + struct timespec new_rtc_ts = { 0 }; + getrtctime(&new_rtc_ts); + + struct timespec naptime = new_rtc_ts; + timespecsub(&naptime, &old_rtc_ts); + + struct timespec run_between_time = new_sys_ts; + timespecsub(&run_between_time, &old_sys_ts); + + timespecsub(&naptime, &run_between_time); + + // dummy advance + // naptime.tv_sec += 29000; + + acpi_resync_clock(sc, + (naptime.tv_sec >= 0 && old_rtc_ts.tv_sec != 0 && + new_rtc_ts.tv_sec != 0) ? + &naptime : + NULL); + acpi_enable_fixed_events(sc); + + // printf("oldrtc: %d: newrtc %d naptime: %d rbt: %d\n", + // (int)old_rtc_ts.tv_sec, + // (int)new_rtc_ts.tv_sec, + // (int)naptime.tv_sec, + // (int)run_between_time.tv_sec); } sc->acpi_next_sstate = 0; @@ -3064,9 +3178,8 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state) } #endif - resume_all_proc(); - EVENTHANDLER_INVOKE(power_resume); + resume_all_proc(); /* Allow another sleep request after a while. */ callout_schedule(&acpi_sleep_timer, hz * ACPI_MINIMUM_AWAKETIME); @@ -3079,15 +3192,10 @@ acpi_EnterSleepState(struct acpi_softc *sc, int state) } static void -acpi_resync_clock(struct acpi_softc *sc) +acpi_resync_clock(struct acpi_softc *sc, struct timespec *naptime) { - - /* - * Warm up timecounter again and reset system clock. - */ - (void)timecounter->tc_get_timecount(timecounter); - (void)timecounter->tc_get_timecount(timecounter); - inittodr(time_second + sc->acpi_sleep_delay); + // inittodr(time_second + sc->acpi_sleep_delay); + tc_addnaptime(naptime); } /* Enable or disable the device's wake GPE. */ diff --git a/sys/dev/acpica/acpi_resource.c b/sys/dev/acpica/acpi_resource.c index 21543141b90c..660352188514 100644 --- a/sys/dev/acpica/acpi_resource.c +++ b/sys/dev/acpica/acpi_resource.c @@ -344,23 +344,29 @@ acpi_parse_resource(ACPI_RESOURCE *res, void *context) break; if (res->Type == ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64 && res->Data.Address.ProducerConsumer != ACPI_CONSUMER) { +#ifdef ACPI_DEBUG ACPI_DEBUG_PRINT((ACPI_DB_RESOURCES, "ignored %s %s producer\n", name, acpi_address_range_name(res->Data.Address.ResourceType))); +#endif break; } if (res->Data.Address.ResourceType != ACPI_MEMORY_RANGE && res->Data.Address.ResourceType != ACPI_IO_RANGE) { +#ifdef ACPI_DEBUG ACPI_DEBUG_PRINT((ACPI_DB_RESOURCES, "ignored %s for non-memory, non-I/O\n", name)); +#endif break; } #ifdef __i386__ if (min > ULONG_MAX || (res->Data.Address.MaxAddressFixed && max > ULONG_MAX)) { +#ifdef ACPI_DEBUG ACPI_DEBUG_PRINT((ACPI_DB_RESOURCES, "ignored %s above 4G\n", name)); +#endif break; } if (max > ULONG_MAX) @@ -369,23 +375,31 @@ acpi_parse_resource(ACPI_RESOURCE *res, void *context) if (res->Data.Address.MinAddressFixed == ACPI_ADDRESS_FIXED && res->Data.Address.MaxAddressFixed == ACPI_ADDRESS_FIXED) { if (res->Data.Address.ResourceType == ACPI_MEMORY_RANGE) { +#ifdef ACPI_DEBUG ACPI_DEBUG_PRINT((ACPI_DB_RESOURCES, "%s/Memory 0x%jx/%ju\n", name, (uintmax_t)min, (uintmax_t)length)); +#endif set->set_memory(dev, arc->context, min, length); } else { +#ifdef ACPI_DEBUG ACPI_DEBUG_PRINT((ACPI_DB_RESOURCES, "%s/IO 0x%jx/%ju\n", name, (uintmax_t)min, (uintmax_t)length)); +#endif set->set_ioport(dev, arc->context, min, length); } } else { if (res->Data.Address32.ResourceType == ACPI_MEMORY_RANGE) { +#ifdef ACPI_DEBUG ACPI_DEBUG_PRINT((ACPI_DB_RESOURCES, "%s/Memory 0x%jx-0x%jx/%ju\n", name, (uintmax_t)min, (uintmax_t)max, (uintmax_t)length)); +#endif set->set_memoryrange(dev, arc->context, min, max, length, gran); } else { +#ifdef ACPI_DEBUG ACPI_DEBUG_PRINT((ACPI_DB_RESOURCES, "%s/IO 0x%jx-0x%jx/%ju\n", name, (uintmax_t)min, (uintmax_t)max, (uintmax_t)length)); +#endif set->set_iorange(dev, arc->context, min, max, length, gran); } } diff --git a/sys/dev/acpica/acpi_timer.c b/sys/dev/acpica/acpi_timer.c index 34a832089a23..44195e9b494a 100644 --- a/sys/dev/acpica/acpi_timer.c +++ b/sys/dev/acpica/acpi_timer.c @@ -107,11 +107,22 @@ static struct timecounter acpi_timer_timecounter = { -1 /* quality (chosen later) */ }; +static uint32_t old_timer_val; + static __inline uint32_t acpi_timer_read(void) { + uint32_t ret = bus_space_read_4(acpi_timer_bst, acpi_timer_bsh, 0); + + if (ret == (uint32_t)-1) { + // printf("return old acpi timer val instead of %lld\n", + // (long long)ret); + return (old_timer_val); + } else { + old_timer_val = ret; + } - return (bus_space_read_4(acpi_timer_bst, acpi_timer_bsh, 0)); + return (ret); } /* diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c index c4e94762fe39..1b87bf24d1c2 100644 --- a/sys/kern/kern_tc.c +++ b/sys/kern/kern_tc.c @@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$"); #include <sys/limits.h> #include <sys/lock.h> #include <sys/mutex.h> +#include <sys/pcpu.h> #include <sys/proc.h> #include <sys/sbuf.h> #include <sys/sleepqueue.h> @@ -133,7 +134,8 @@ volatile int rtc_generation = 1; static int tc_chosen; /* Non-zero if a specific tc was chosen via sysctl. */ -static void tc_windup(struct bintime *new_boottimebin); +static void tc_windup( + struct bintime *new_boottimebin, struct bintime *additional_naptime); static void cpu_tick_calibrate(int); void dtrace_getnanotime(struct timespec *tsp); @@ -1299,6 +1301,8 @@ tc_setclock(struct timespec *ts) struct timespec tbef, taft; struct bintime bt, bt2; + // printf("tc_setclock\n"); + timespec2bintime(ts, &bt); nanotime(&tbef); mtx_lock_spin(&tc_setclock_mtx); @@ -1307,13 +1311,13 @@ tc_setclock(struct timespec *ts) bintime_sub(&bt, &bt2); /* XXX fiddle all the little crinkly bits around the fiords... */ - tc_windup(&bt); + tc_windup(&bt, NULL); mtx_unlock_spin(&tc_setclock_mtx); /* Avoid rtc_generation == 0, since td_rtcgen == 0 is special. */ atomic_add_rel_int(&rtc_generation, 2); sleepq_chains_remove_matching(sleeping_on_old_rtc); - if (timestepwarnings) { + if (timestepwarnings || true) { nanotime(&taft); log(LOG_INFO, "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n", @@ -1323,13 +1327,45 @@ tc_setclock(struct timespec *ts) } } +void tc_reset_offset(void); + +void +tc_reset_offset() +{ + // printf("tc_reset_offset\n"); + + mtx_lock_spin(&tc_setclock_mtx); + cpu_tick_calibrate(1); + timehands->th_offset_count = timecounter->tc_get_timecount(timecounter); + timehands->th_offset_count &= timecounter->tc_counter_mask; + mtx_unlock_spin(&tc_setclock_mtx); +} + +void +tc_addnaptime(struct timespec *ts) +{ + struct bintime bt; + + // printf("tc_addnaptime\n"); + + if (ts) { + timespec2bintime(ts, &bt); + } + mtx_lock_spin(&tc_setclock_mtx); + + /* XXX fiddle all the little crinkly bits around the fiords... */ + tc_windup(NULL, ts ? &bt : NULL); + mtx_unlock_spin(&tc_setclock_mtx); +} + + /* * Initialize the next struct timehands in the ring and make * it the active timehands. Along the way we might switch to a different * timecounter and/or do seconds processing in NTP. Slightly magic. */ static void -tc_windup(struct bintime *new_boottimebin) +tc_windup(struct bintime *new_boottimebin, struct bintime *additional_naptime) { struct bintime bt; struct timehands *th, *tho; @@ -1338,6 +1374,11 @@ tc_windup(struct bintime *new_boottimebin) int i; time_t t; + // if (new_boottimebin || additional_naptime) { + // printf( + // "tc_windup %p %p\n", new_boottimebin, additional_naptime); + // } + /* * Make the next timehands a copy of the current one, but do * not overwrite the generation or next pointer. While we @@ -1356,6 +1397,10 @@ tc_windup(struct bintime *new_boottimebin) if (new_boottimebin != NULL) th->th_boottime = *new_boottimebin; + if (additional_naptime != NULL) { + bintime_add(&th->th_offset, additional_naptime); + } + /* * Capture a timecounter delta on the current timecounter and if * changing timecounters, a counter value from the new timecounter. @@ -1369,8 +1414,17 @@ tc_windup(struct bintime *new_boottimebin) #ifdef FFCLOCK ffclock_windup(delta); #endif + // long long oc_1 = th->th_offset_count; + th->th_offset_count += delta; th->th_offset_count &= th->th_counter->tc_counter_mask; + + // long long oc_2 = th->th_offset_count; + + // long long secs1 = th->th_offset.sec; + // long long p_delta = delta; + // long long p_freq = th->th_counter->tc_frequency; + while (delta > th->th_counter->tc_frequency) { /* Eat complete unadjusted seconds. */ delta -= th->th_counter->tc_frequency; @@ -1383,6 +1437,18 @@ tc_windup(struct bintime *new_boottimebin) } bintime_addx(&th->th_offset, th->th_scale * delta); + // long long secs2 = th->th_offset.sec; + + // if (secs2 != secs1) { + // printf( + // "%p tc_windup ++ %lld (delta: %lld, freq: %lld, gen: %lld, ogen: %lld, oc1: %lld, oc2: %lld)\n", + // (void *)curthread, secs2 - secs1, p_delta, p_freq, + // (long long)th->th_generation, + // (long long)tho->th_generation, // + // (long long)oc_1, + // (long long)oc_2); + // } + /* * Hardware latching timecounters may not generate interrupts on * PPS events, so instead we poll them. There is a finite risk that @@ -1411,8 +1477,10 @@ tc_windup(struct bintime *new_boottimebin) for (; i > 0; i--) { t = bt.sec; ntp_update_second(&th->th_adjustment, &bt.sec); - if (bt.sec != t) + if (bt.sec != t) { th->th_boottime.sec += bt.sec - t; + // printf("tc_windup ntp update %d\n", (int)(bt.sec - t)); + } } /* Update the UTC timestamps used by the get*() functions. */ th->th_bintime = bt; @@ -1911,7 +1979,7 @@ tc_ticktock(int cnt) count += cnt; if (count >= tc_tick) { count = 0; - tc_windup(NULL); + tc_windup(NULL, NULL); } mtx_unlock_spin(&tc_setclock_mtx); } @@ -1990,7 +2058,7 @@ inittimecounter(void *dummy) (void)timecounter->tc_get_timecount(timecounter); (void)timecounter->tc_get_timecount(timecounter); mtx_lock_spin(&tc_setclock_mtx); - tc_windup(NULL); + tc_windup(NULL, NULL); mtx_unlock_spin(&tc_setclock_mtx); } diff --git a/sys/kern/subr_rtc.c b/sys/kern/subr_rtc.c index 66cde8fb2e07..917fa6f5f517 100644 --- a/sys/kern/subr_rtc.c +++ b/sys/kern/subr_rtc.c @@ -422,3 +422,25 @@ sysctl_clock_do_io(SYSCTL_HANDLER_ARGS) return (0); } + +int +getrtctime(struct timespec *ts) +{ + int error; + struct rtc_instance *rtc; + + error = ENODEV; + sx_xlock(&rtc_list_lock); + LIST_FOREACH(rtc, &rtc_list, rtc_entries) + { + if ((error = CLOCK_GETTIME(rtc->clockdev, ts)) != 0) + continue; + if (ts->tv_sec < 0 || ts->tv_nsec < 0) { + error = EINVAL; + continue; + } + break; + } + sx_xunlock(&rtc_list_lock); + return error; +} diff --git a/sys/sys/time.h b/sys/sys/time.h index f278e6315d68..86e9f5d5b1f7 100644 --- a/sys/sys/time.h +++ b/sys/sys/time.h @@ -416,6 +416,7 @@ struct clockinfo { */ void inittodr(time_t base); void resettodr(void); +int getrtctime(struct timespec *ts); extern volatile time_t time_second; extern volatile time_t time_uptime; diff --git a/sys/sys/timetc.h b/sys/sys/timetc.h index 55f61af4c46c..b90aeeaf74c0 100644 --- a/sys/sys/timetc.h +++ b/sys/sys/timetc.h @@ -89,6 +89,7 @@ extern int tc_min_ticktock_freq; /* u_int64_t tc_getfrequency(void); void tc_init(struct timecounter *tc); void tc_setclock(struct timespec *ts); +void tc_addnaptime(struct timespec *ts); void tc_ticktock(int cnt); void cpu_tick_calibration(void); diff --git a/sys/x86/acpica/acpi_wakeup.c b/sys/x86/acpica/acpi_wakeup.c index ef910cca6059..c13df8d2ad0c 100644 --- a/sys/x86/acpica/acpi_wakeup.c +++ b/sys/x86/acpica/acpi_wakeup.c @@ -316,31 +316,44 @@ acpi_wakeup_machdep(struct acpi_softc *sc, int state, int sleep_result, if (!intr_enabled) { /* Wakeup MD procedures in interrupt disabled context */ + // printf("acpi_wakeup_machdep 1\n"); + if (sleep_result == 1) { + // printf("acpi_wakeup_machdep 2\n"); pmap_init_pat(); + // printf("acpi_wakeup_machdep 3\n"); initializecpu(); + // printf("acpi_wakeup_machdep 4\n"); PCPU_SET(switchtime, 0); + // printf("acpi_wakeup_machdep 5\n"); PCPU_SET(switchticks, ticks); + // printf("acpi_wakeup_machdep 6\n"); #ifdef DEV_APIC lapic_xapic_mode(); #endif + // printf("acpi_wakeup_machdep 7\n"); #ifdef SMP if (!CPU_EMPTY(&suspcpus)) acpi_wakeup_cpus(sc); #endif + // printf("acpi_wakeup_machdep 8\n"); } + // printf("acpi_wakeup_machdep 9\n"); #ifdef SMP if (!CPU_EMPTY(&suspcpus)) resume_cpus(suspcpus); #endif + // printf("acpi_wakeup_machdep 10\n"); mca_resume(); #ifdef __amd64__ if (vmm_resume_p != NULL) vmm_resume_p(); #endif + // printf("acpi_wakeup_machdep 11\n"); intr_resume(/*suspend_cancelled*/false); + // printf("acpi_wakeup_machdep 12\n"); AcpiSetFirmwareWakingVector(0, 0); } else { /* Wakeup MD procedures in interrupt enabled context */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?fa2e524c-172b-a3cc-a6d0-6963b20e1452>