From owner-svn-src-all@freebsd.org Wed Jul 27 11:49:43 2016 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 45144BA49B2; Wed, 27 Jul 2016 11:49:43 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 071731B02; Wed, 27 Jul 2016 11:49:42 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u6RBng49089542; Wed, 27 Jul 2016 11:49:42 GMT (envelope-from kib@FreeBSD.org) Received: (from kib@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u6RBngu3089540; Wed, 27 Jul 2016 11:49:42 GMT (envelope-from kib@FreeBSD.org) Message-Id: <201607271149.u6RBngu3089540@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: kib set sender to kib@FreeBSD.org using -f From: Konstantin Belousov Date: Wed, 27 Jul 2016 11:49:42 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r303387 - head/sys/kern X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.22 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 27 Jul 2016 11:49:43 -0000 Author: kib Date: Wed Jul 27 11:49:41 2016 New Revision: 303387 URL: https://svnweb.freebsd.org/changeset/base/303387 Log: Prevent parallel tc_windup() calls, both parallel top-level calls from setclock() and from simultaneous top-level and interrupt. For this, tc_windup() is protected with a tc_setclock_mtx spinlock, in the try mode when called from hardclock interrupt. If spinlock cannot be obtained without spinning from the interrupt context, this means that top-level executes tc_windup() on other core and our try may be avoided. The boottimebin and boottime variables should be adjusted from tc_windup(). To be correct, they must be part of the timehands and read using lockless protocol. Remove the globals and reimplement the getboottime(9)/getboottimebin(9) KPI using the timehands read protocol. Tested by: pho (as part of the whole patch) Reviewed by: jhb (same) Discussed wit: bde Sponsored by: The FreeBSD Foundation MFC after: 1 month X-Differential revision: https://reviews.freebsd.org/D7302 Modified: head/sys/kern/kern_clock.c head/sys/kern/kern_tc.c Modified: head/sys/kern/kern_clock.c ============================================================================== --- head/sys/kern/kern_clock.c Wed Jul 27 11:40:06 2016 (r303386) +++ head/sys/kern/kern_clock.c Wed Jul 27 11:49:41 2016 (r303387) @@ -381,7 +381,9 @@ volatile int ticks; int psratio; static DPCPU_DEFINE(int, pcputicks); /* Per-CPU version of ticks. */ -static int global_hardclock_run = 0; +#ifdef DEVICE_POLLING +static int devpoll_run = 0; +#endif /* * Initialize clock frequencies and start both clocks running. @@ -584,15 +586,15 @@ hardclock_cnt(int cnt, int usermode) #endif /* We are in charge to handle this tick duty. */ if (newticks > 0) { - /* Dangerous and no need to call these things concurrently. */ - if (atomic_cmpset_acq_int(&global_hardclock_run, 0, 1)) { - tc_ticktock(newticks); + tc_ticktock(newticks); #ifdef DEVICE_POLLING + /* Dangerous and no need to call these things concurrently. */ + if (atomic_cmpset_acq_int(&devpoll_run, 0, 1)) { /* This is very short and quick. */ hardclock_device_poll(); -#endif /* DEVICE_POLLING */ - atomic_store_rel_int(&global_hardclock_run, 0); + atomic_store_rel_int(&devpoll_run, 0); } +#endif /* DEVICE_POLLING */ #ifdef SW_WATCHDOG if (watchdog_enabled > 0) { i = atomic_fetchadd_int(&watchdog_ticks, -newticks); Modified: head/sys/kern/kern_tc.c ============================================================================== --- head/sys/kern/kern_tc.c Wed Jul 27 11:40:06 2016 (r303386) +++ head/sys/kern/kern_tc.c Wed Jul 27 11:49:41 2016 (r303387) @@ -70,6 +70,7 @@ struct timehands { struct bintime th_offset; struct timeval th_microtime; struct timespec th_nanotime; + struct bintime th_boottime; /* Fields not to be copied in tc_windup start with th_generation. */ u_int th_generation; struct timehands *th_next; @@ -96,8 +97,6 @@ int tc_min_ticktock_freq = 1; volatile time_t time_second = 1; volatile time_t time_uptime = 1; -static struct bintime boottimebin_x; -static struct timeval boottime_x; static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, CTLTYPE_STRUCT|CTLFLAG_RD, NULL, 0, sysctl_kern_boottime, "S,timeval", "System boottime"); @@ -125,7 +124,7 @@ SYSCTL_PROC(_kern_timecounter, OID_AUTO, static int tc_chosen; /* Non-zero if a specific tc was chosen via sysctl. */ -static void tc_windup(void); +static void tc_windup(struct bintime *new_boottimebin); static void cpu_tick_calibrate(int); void dtrace_getnanotime(struct timespec *tsp); @@ -228,9 +227,17 @@ fbclock_microuptime(struct timeval *tvp) void fbclock_bintime(struct bintime *bt) { + struct timehands *th; + unsigned int gen; - fbclock_binuptime(bt); - bintime_add(bt, &boottimebin_x); + do { + th = timehands; + gen = atomic_load_acq_int(&th->th_generation); + *bt = th->th_offset; + bintime_addx(bt, th->th_scale * tc_delta(th)); + bintime_add(bt, &th->th_boottime); + atomic_thread_fence_acq(); + } while (gen == 0 || gen != th->th_generation); } void @@ -303,9 +310,9 @@ fbclock_getbintime(struct bintime *bt) th = timehands; gen = atomic_load_acq_int(&th->th_generation); *bt = th->th_offset; + bintime_add(bt, &th->th_boottime); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); - bintime_add(bt, &boottimebin_x); } void @@ -372,9 +379,17 @@ microuptime(struct timeval *tvp) void bintime(struct bintime *bt) { + struct timehands *th; + u_int gen; - binuptime(bt); - bintime_add(bt, &boottimebin_x); + do { + th = timehands; + gen = atomic_load_acq_int(&th->th_generation); + *bt = th->th_offset; + bintime_addx(bt, th->th_scale * tc_delta(th)); + bintime_add(bt, &th->th_boottime); + atomic_thread_fence_acq(); + } while (gen == 0 || gen != th->th_generation); } void @@ -447,9 +462,9 @@ getbintime(struct bintime *bt) th = timehands; gen = atomic_load_acq_int(&th->th_generation); *bt = th->th_offset; + bintime_add(bt, &th->th_boottime); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); - bintime_add(bt, &boottimebin_x); } void @@ -484,15 +499,24 @@ getmicrotime(struct timeval *tvp) void getboottime(struct timeval *boottime) { + struct bintime boottimebin; - *boottime = boottime_x; + getboottimebin(&boottimebin); + bintime2timeval(&boottimebin, boottime); } void getboottimebin(struct bintime *boottimebin) { + struct timehands *th; + u_int gen; - *boottimebin = boottimebin_x; + do { + th = timehands; + gen = atomic_load_acq_int(&th->th_generation); + *boottimebin = th->th_boottime; + atomic_thread_fence_acq(); + } while (gen == 0 || gen != th->th_generation); } #ifdef FFCLOCK @@ -1237,10 +1261,12 @@ tc_getfrequency(void) return (timehands->th_counter->tc_frequency); } +static struct mtx tc_setclock_mtx; +MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN); + /* * Step our concept of UTC. This is done by modifying our estimate of * when we booted. - * XXX: not locked. */ void tc_setclock(struct timespec *ts) @@ -1248,26 +1274,24 @@ tc_setclock(struct timespec *ts) struct timespec tbef, taft; struct bintime bt, bt2; - cpu_tick_calibrate(1); - nanotime(&tbef); timespec2bintime(ts, &bt); + nanotime(&tbef); + mtx_lock_spin(&tc_setclock_mtx); + cpu_tick_calibrate(1); binuptime(&bt2); bintime_sub(&bt, &bt2); - bintime_add(&bt2, &boottimebin_x); - boottimebin_x = bt; - bintime2timeval(&bt, &boottime_x); /* XXX fiddle all the little crinkly bits around the fiords... */ - tc_windup(); - nanotime(&taft); + tc_windup(&bt); + mtx_unlock_spin(&tc_setclock_mtx); if (timestepwarnings) { + nanotime(&taft); log(LOG_INFO, "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n", (intmax_t)tbef.tv_sec, tbef.tv_nsec, (intmax_t)taft.tv_sec, taft.tv_nsec, (intmax_t)ts->tv_sec, ts->tv_nsec); } - cpu_tick_calibrate(1); } /* @@ -1276,7 +1300,7 @@ tc_setclock(struct timespec *ts) * timecounter and/or do seconds processing in NTP. Slightly magic. */ static void -tc_windup(void) +tc_windup(struct bintime *new_boottimebin) { struct bintime bt; struct timehands *th, *tho; @@ -1300,6 +1324,8 @@ tc_windup(void) th->th_generation = 0; atomic_thread_fence_rel(); bcopy(tho, th, offsetof(struct timehands, th_generation)); + if (new_boottimebin != NULL) + th->th_boottime = *new_boottimebin; /* * Capture a timecounter delta on the current timecounter and if @@ -1349,7 +1375,7 @@ tc_windup(void) * case we missed a leap second. */ bt = th->th_offset; - bintime_add(&bt, &boottimebin_x); + bintime_add(&bt, &th->th_boottime); i = bt.sec - tho->th_microtime.tv_sec; if (i > LARGE_STEP) i = 2; @@ -1357,7 +1383,7 @@ tc_windup(void) t = bt.sec; ntp_update_second(&th->th_adjustment, &bt.sec); if (bt.sec != t) - boottimebin_x.sec += bt.sec - t; + th->th_boottime.sec += bt.sec - t; } /* Update the UTC timestamps used by the get*() functions. */ /* XXX shouldn't do this here. Should force non-`get' versions. */ @@ -1780,7 +1806,7 @@ pps_event(struct pps_state *pps, int eve tcount &= pps->capth->th_counter->tc_counter_mask; bt = pps->capth->th_offset; bintime_addx(&bt, pps->capth->th_scale * tcount); - bintime_add(&bt, &boottimebin_x); + bintime_add(&bt, &pps->capth->th_boottime); bintime2timespec(&bt, &ts); /* If the timecounter was wound up underneath us, bail out. */ @@ -1853,11 +1879,14 @@ tc_ticktock(int cnt) { static int count; - count += cnt; - if (count < tc_tick) - return; - count = 0; - tc_windup(); + if (mtx_trylock_spin(&tc_setclock_mtx)) { + count += cnt; + if (count >= tc_tick) { + count = 0; + tc_windup(NULL); + } + mtx_unlock_spin(&tc_setclock_mtx); + } } static void __inline @@ -1932,7 +1961,9 @@ inittimecounter(void *dummy) /* warm up new timecounter (again) and get rolling. */ (void)timecounter->tc_get_timecount(timecounter); (void)timecounter->tc_get_timecount(timecounter); - tc_windup(); + mtx_lock_spin(&tc_setclock_mtx); + tc_windup(NULL); + mtx_unlock_spin(&tc_setclock_mtx); } SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL); @@ -2106,7 +2137,7 @@ tc_fill_vdso_timehands(struct vdso_timeh vdso_th->th_offset_count = th->th_offset_count; vdso_th->th_counter_mask = th->th_counter->tc_counter_mask; vdso_th->th_offset = th->th_offset; - vdso_th->th_boottime = boottimebin_x; + vdso_th->th_boottime = th->th_boottime; enabled = cpu_fill_vdso_timehands(vdso_th, th->th_counter); if (!vdso_th_enable) enabled = 0; @@ -2127,8 +2158,8 @@ tc_fill_vdso_timehands32(struct vdso_tim vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask; vdso_th32->th_offset.sec = th->th_offset.sec; *(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac; - vdso_th32->th_boottime.sec = boottimebin_x.sec; - *(uint64_t *)&vdso_th32->th_boottime.frac[0] = boottimebin_x.frac; + vdso_th32->th_boottime.sec = th->th_boottime.sec; + *(uint64_t *)&vdso_th32->th_boottime.frac[0] = th->th_boottime.frac; enabled = cpu_fill_vdso_timehands32(vdso_th32, th->th_counter); if (!vdso_th_enable) enabled = 0;