X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/5d5c5d0d5b79ade9a973d55186ffda2638ba2b6e..d26ffc64f583ab2d29df48f13518685602bc8832:/osfmk/kern/clock.c diff --git a/osfmk/kern/clock.c b/osfmk/kern/clock.c index a3266197e..9bd9f3b0e 100644 --- a/osfmk/kern/clock.c +++ b/osfmk/kern/clock.c @@ -1,101 +1,323 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_OSREFERENCE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the - * License may not be used to create, or enable the creation or - * redistribution of, unlawful or unlicensed copies of an Apple operating - * system, or to circumvent, violate, or enable the circumvention or - * violation of, any terms of an Apple operating system software license - * agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and * limitations under the License. - * - * @APPLE_LICENSE_OSREFERENCE_HEADER_END@ + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ */ /* */ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)time.h 8.5 (Berkeley) 5/4/95 + * $FreeBSD$ + */ #include -#include #include #include #include #include #include +#include +#include #include #include +#include +#include #include #include -decl_simple_lock_data(static,clock_lock) +#include +#include +#include +#include + +uint32_t hz_tick_interval = 1; +static uint64_t has_monotonic_clock = 0; + +decl_simple_lock_data(,clock_lock) +lck_grp_attr_t * settime_lock_grp_attr; +lck_grp_t * settime_lock_grp; +lck_attr_t * settime_lock_attr; +lck_mtx_t settime_lock; + +#define clock_lock() \ + simple_lock(&clock_lock) + +#define clock_unlock() \ + simple_unlock(&clock_lock) + +#define clock_lock_init() \ + simple_lock_init(&clock_lock, 0) + +#ifdef kdp_simple_lock_is_acquired +boolean_t kdp_clock_is_locked() +{ + return kdp_simple_lock_is_acquired(&clock_lock); +} +#endif + +struct bintime { + time_t sec; + uint64_t frac; +}; + +static __inline void +bintime_addx(struct bintime *_bt, uint64_t _x) +{ + uint64_t _u; + + _u = _bt->frac; + _bt->frac += _x; + if (_u > _bt->frac) + _bt->sec++; +} + +static __inline void +bintime_subx(struct bintime *_bt, uint64_t _x) +{ + uint64_t _u; + + _u = _bt->frac; + _bt->frac -= _x; + if (_u < _bt->frac) + _bt->sec--; +} + +static __inline void +bintime_addns(struct bintime *bt, uint64_t ns) +{ + bt->sec += ns/ (uint64_t)NSEC_PER_SEC; + ns = ns % (uint64_t)NSEC_PER_SEC; + if (ns) { + /* 18446744073 = int(2^64 / NSEC_PER_SEC) */ + ns = ns * (uint64_t)18446744073LL; + bintime_addx(bt, ns); + } +} + +static __inline void +bintime_subns(struct bintime *bt, uint64_t ns) +{ + bt->sec -= ns/ (uint64_t)NSEC_PER_SEC; + ns = ns % (uint64_t)NSEC_PER_SEC; + if (ns) { + /* 18446744073 = int(2^64 / NSEC_PER_SEC) */ + ns = ns * (uint64_t)18446744073LL; + bintime_subx(bt, ns); + } +} + +static __inline void +bintime_addxns(struct bintime *bt, uint64_t a, int64_t xns) +{ + uint64_t uxns = (xns > 0)?(uint64_t )xns:(uint64_t)-xns; + uint64_t ns = multi_overflow(a, uxns); + if (xns > 0) { + if (ns) + bintime_addns(bt, ns); + ns = (a * uxns) / (uint64_t)NSEC_PER_SEC; + bintime_addx(bt, ns); + } + else{ + if (ns) + bintime_subns(bt, ns); + ns = (a * uxns) / (uint64_t)NSEC_PER_SEC; + bintime_subx(bt,ns); + } +} + + +static __inline void +bintime_add(struct bintime *_bt, const struct bintime *_bt2) +{ + uint64_t _u; + + _u = _bt->frac; + _bt->frac += _bt2->frac; + if (_u > _bt->frac) + _bt->sec++; + _bt->sec += _bt2->sec; +} + +static __inline void +bintime_sub(struct bintime *_bt, const struct bintime *_bt2) +{ + uint64_t _u; + + _u = _bt->frac; + _bt->frac -= _bt2->frac; + if (_u < _bt->frac) + _bt->sec--; + _bt->sec -= _bt2->sec; +} + +static __inline void +clock2bintime(const clock_sec_t *secs, const clock_usec_t *microsecs, struct bintime *_bt) +{ + + _bt->sec = *secs; + /* 18446744073709 = int(2^64 / 1000000) */ + _bt->frac = *microsecs * (uint64_t)18446744073709LL; +} + +static __inline void +bintime2usclock(const struct bintime *_bt, clock_sec_t *secs, clock_usec_t *microsecs) +{ + + *secs = _bt->sec; + *microsecs = ((uint64_t)USEC_PER_SEC * (uint32_t)(_bt->frac >> 32)) >> 32; +} + +static __inline void +bintime2nsclock(const struct bintime *_bt, clock_sec_t *secs, clock_usec_t *nanosecs) +{ + + *secs = _bt->sec; + *nanosecs = ((uint64_t)NSEC_PER_SEC * (uint32_t)(_bt->frac >> 32)) >> 32; +} + +static __inline void +bintime2absolutetime(const struct bintime *_bt, uint64_t *abs) +{ + uint64_t nsec; + nsec = (uint64_t) _bt->sec * (uint64_t)NSEC_PER_SEC + (((uint64_t)NSEC_PER_SEC * (uint32_t)(_bt->frac >> 32)) >> 32); + nanoseconds_to_absolutetime(nsec, abs); +} + +struct latched_time { + uint64_t monotonic_time_usec; + uint64_t mach_time; +}; + +extern int +kernel_sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); /* * Time of day (calendar) variables. * * Algorithm: * - * TOD <- (seconds + epoch, fraction) <- CONV(current absolute time + offset) + * TOD <- bintime + delta*scale * - * where CONV converts absolute time units into seconds and a fraction. + * where : + * bintime is a cumulative offset that includes bootime and scaled time elapsed betweed bootime and last scale update. + * delta is ticks elapsed since last scale update. + * scale is computed according to an adjustment provided by ntp_kern. */ static struct clock_calend { - uint64_t epoch; - uint64_t offset; -} clock_calend; + uint64_t s_scale_ns; /* scale to apply for each second elapsed, it converts in ns */ + int64_t s_adj_nsx; /* additional adj to apply for each second elapsed, it is expressed in 64 bit frac of ns */ + uint64_t tick_scale_x; /* scale to apply for each tick elapsed, it converts in 64 bit frac of s */ + uint64_t offset_count; /* abs time from which apply current scales */ + struct bintime offset; /* cumulative offset expressed in (sec, 64 bits frac of a second) */ + struct bintime bintime; /* cumulative offset (it includes bootime) expressed in (sec, 64 bits frac of a second) */ + struct bintime boottime; /* boot time expressed in (sec, 64 bits frac of a second) */ + struct bintime basesleep; +} clock_calend; + +static uint64_t ticks_per_sec; /* ticks in a second (expressed in abs time) */ + +#if DEVELOPMENT || DEBUG +clock_sec_t last_utc_sec = 0; +clock_usec_t last_utc_usec = 0; +clock_sec_t max_utc_sec = 0; +clock_sec_t last_sys_sec = 0; +clock_usec_t last_sys_usec = 0; +#endif + +#if DEVELOPMENT || DEBUG +extern int g_should_log_clock_adjustments; + +static void print_all_clock_variables(const char*, clock_sec_t* pmu_secs, clock_usec_t* pmu_usec, clock_sec_t* sys_secs, clock_usec_t* sys_usec, struct clock_calend* calend_cp); +static void print_all_clock_variables_internal(const char *, struct clock_calend* calend_cp); +#else +#define print_all_clock_variables(...) do { } while (0) +#define print_all_clock_variables_internal(...) do { } while (0) +#endif + +#if CONFIG_DTRACE + /* - * Calendar adjustment variables and values. + * Unlocked calendar flipflop; this is used to track a clock_calend such + * that we can safely access a snapshot of a valid clock_calend structure + * without needing to take any locks to do it. + * + * The trick is to use a generation count and set the low bit when it is + * being updated/read; by doing this, we guarantee, through use of the + * hw_atomic functions, that the generation is incremented when the bit + * is cleared atomically (by using a 1 bit add). */ -#define calend_adjperiod (NSEC_PER_SEC / 100) /* adjustment period, ns */ -#define calend_adjskew (40 * NSEC_PER_USEC) /* "standard" skew, ns / period */ -#define calend_adjbig (NSEC_PER_SEC) /* use 10x skew above adjbig ns */ - -static uint64_t calend_adjstart; /* Absolute time value for start of this adjustment period */ -static uint32_t calend_adjoffset; /* Absolute time offset for this adjustment period as absolute value */ - -static int32_t calend_adjdelta; /* Nanosecond time delta for this adjustment period */ -static int64_t calend_adjtotal; /* Nanosecond remaining total adjustment */ - -static uint64_t calend_adjdeadline; /* Absolute time value for next adjustment period */ -static uint32_t calend_adjinterval; /* Absolute time interval of adjustment period */ - -static timer_call_data_t calend_adjcall; -static uint32_t calend_adjactive; +static struct unlocked_clock_calend { + struct clock_calend calend; /* copy of calendar */ + uint32_t gen; /* generation count */ +} flipflop[ 2]; -static uint32_t calend_set_adjustment( - int32_t *secs, - int32_t *microsecs); +static void clock_track_calend_nowait(void); -static void calend_adjust_call(void); -static uint32_t calend_adjust(void); +#endif -static thread_call_data_t calend_wakecall; +void _clock_delay_until_deadline(uint64_t interval, uint64_t deadline); +void _clock_delay_until_deadline_with_leeway(uint64_t interval, uint64_t deadline, uint64_t leeway); -extern void IOKitResetTime(void); - -static uint64_t clock_boottime; /* Seconds boottime epoch */ +/* Boottime variables*/ +static uint64_t clock_boottime; +static uint32_t clock_boottime_usec; #define TIME_ADD(rsecs, secs, rfrac, frac, unit) \ MACRO_BEGIN \ @@ -108,7 +330,7 @@ MACRO_END #define TIME_SUB(rsecs, secs, rfrac, frac, unit) \ MACRO_BEGIN \ - if ((int32_t)((rfrac) -= (frac)) < 0) { \ + if ((int)((rfrac) -= (frac)) < 0) { \ (rfrac) += (unit); \ (rsecs) -= 1; \ } \ @@ -123,17 +345,19 @@ MACRO_END void clock_config(void) { - simple_lock_init(&clock_lock, 0); - timer_call_setup(&calend_adjcall, (timer_call_func_t)calend_adjust_call, NULL); - thread_call_setup(&calend_wakecall, (thread_call_func_t)IOKitResetTime, NULL); + clock_lock_init(); + + settime_lock_grp_attr = lck_grp_attr_alloc_init(); + settime_lock_grp = lck_grp_alloc_init("settime grp", settime_lock_grp_attr); + settime_lock_attr = lck_attr_alloc_init(); + lck_mtx_init(&settime_lock, settime_lock_grp, settime_lock_attr); clock_oldconfig(); - /* - * Initialize the timer callouts. - */ - timer_call_initialize(); + ntp_init(); + + nanoseconds_to_absolutetime((uint64_t)NSEC_PER_SEC, &ticks_per_sec); } /* @@ -160,8 +384,8 @@ clock_timebase_init(void) { uint64_t abstime; - nanoseconds_to_absolutetime(calend_adjperiod, &abstime); - calend_adjinterval = abstime; + nanoseconds_to_absolutetime(NSEC_PER_SEC / 100, &abstime); + hz_tick_interval = (uint32_t)abstime; sched_timebase_init(); } @@ -176,7 +400,7 @@ mach_timebase_info_trap( struct mach_timebase_info_trap_args *args) { mach_vm_address_t out_info_addr = args->info; - mach_timebase_info_data_t info; + mach_timebase_info_data_t info = {}; clock_timebase_info(&info); @@ -197,37 +421,213 @@ mach_timebase_info_trap( */ void clock_get_calendar_microtime( - uint32_t *secs, - uint32_t *microsecs) + clock_sec_t *secs, + clock_usec_t *microsecs) { - uint64_t now; - spl_t s; + clock_get_calendar_absolute_and_microtime(secs, microsecs, NULL); +} - s = splclock(); - simple_lock(&clock_lock); +/* + * get_scale_factors_from_adj: + * + * computes scale factors from the value given in adjustment. + * + * Part of the code has been taken from tc_windup of FreeBSD + * written by Poul-Henning Kamp , Julien Ridoux and + * Konstantin Belousov. + * https://github.com/freebsd/freebsd/blob/master/sys/kern/kern_tc.c + */ +static void +get_scale_factors_from_adj(int64_t adjustment, uint64_t* tick_scale_x, uint64_t* s_scale_ns, int64_t* s_adj_nsx) +{ + uint64_t scale; + int64_t nano, frac; + + /*- + * Calculating the scaling factor. We want the number of 1/2^64 + * fractions of a second per period of the hardware counter, taking + * into account the th_adjustment factor which the NTP PLL/adjtime(2) + * processing provides us with. + * + * The th_adjustment is nanoseconds per second with 32 bit binary + * fraction and we want 64 bit binary fraction of second: + * + * x = a * 2^32 / 10^9 = a * 4.294967296 + * + * The range of th_adjustment is +/- 5000PPM so inside a 64bit int + * we can only multiply by about 850 without overflowing, that + * leaves no suitably precise fractions for multiply before divide. + * + * Divide before multiply with a fraction of 2199/512 results in a + * systematic undercompensation of 10PPM of th_adjustment. On a + * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. + * + * We happily sacrifice the lowest of the 64 bits of our result + * to the goddess of code clarity. + * + */ + scale = (uint64_t)1 << 63; + scale += (adjustment / 1024) * 2199; + scale /= ticks_per_sec; + *tick_scale_x = scale * 2; - now = mach_absolute_time(); + /* + * hi part of adj + * it contains ns (without fraction) to add to the next sec. + * Get ns scale factor for the next sec. + */ + nano = (adjustment > 0)? adjustment >> 32 : -((-adjustment) >> 32); + scale = (uint64_t) NSEC_PER_SEC; + scale += nano; + *s_scale_ns = scale; + + /* + * lo part of adj + * it contains 32 bit frac of ns to add to the next sec. + * Keep it as additional adjustment for the next sec. + */ + frac = (adjustment > 0)? ((uint32_t) adjustment) : -((uint32_t) (-adjustment)); + *s_adj_nsx = (frac>0)? frac << 32 : -( (-frac) << 32); + + return; +} - if (calend_adjdelta < 0) { - uint32_t t32; +/* + * scale_delta: + * + * returns a bintime struct representing delta scaled accordingly to the + * scale factors provided to this function. + */ +static struct bintime +scale_delta(uint64_t delta, uint64_t tick_scale_x, uint64_t s_scale_ns, int64_t s_adj_nsx) +{ + uint64_t sec, new_ns, over; + struct bintime bt; - if (now > calend_adjstart) { - t32 = now - calend_adjstart; + bt.sec = 0; + bt.frac = 0; - if (t32 > calend_adjoffset) - now -= calend_adjoffset; - else - now = calend_adjstart; + /* + * If more than one second is elapsed, + * scale fully elapsed seconds using scale factors for seconds. + * s_scale_ns -> scales sec to ns. + * s_adj_nsx -> additional adj expressed in 64 bit frac of ns to apply to each sec. + */ + if (delta > ticks_per_sec) { + sec = (delta/ticks_per_sec); + new_ns = sec * s_scale_ns; + bintime_addns(&bt, new_ns); + if (s_adj_nsx) { + if (sec == 1) { + /* shortcut, no overflow can occur */ + if (s_adj_nsx > 0) + bintime_addx(&bt, (uint64_t)s_adj_nsx/ (uint64_t)NSEC_PER_SEC); + else + bintime_subx(&bt, (uint64_t)-s_adj_nsx/ (uint64_t)NSEC_PER_SEC); + } + else{ + /* + * s_adj_nsx is 64 bit frac of ns. + * sec*s_adj_nsx might overflow in int64_t. + * use bintime_addxns to not lose overflowed ns. + */ + bintime_addxns(&bt, sec, s_adj_nsx); + } } + delta = (delta % ticks_per_sec); + } + + over = multi_overflow(tick_scale_x, delta); + if(over){ + bt.sec += over; } - now += clock_calend.offset; + /* + * scale elapsed ticks using the scale factor for ticks. + */ + bintime_addx(&bt, delta * tick_scale_x); + + return bt; +} + +/* + * get_scaled_time: + * + * returns the scaled time of the time elapsed from the last time + * scale factors were updated to now. + */ +static struct bintime +get_scaled_time(uint64_t now) +{ + uint64_t delta; + + /* + * Compute ticks elapsed since last scale update. + * This time will be scaled according to the value given by ntp kern. + */ + delta = now - clock_calend.offset_count; + + return scale_delta(delta, clock_calend.tick_scale_x, clock_calend.s_scale_ns, clock_calend.s_adj_nsx); +} + +static void +clock_get_calendar_absolute_and_microtime_locked( + clock_sec_t *secs, + clock_usec_t *microsecs, + uint64_t *abstime) +{ + uint64_t now; + struct bintime bt; + + now = mach_absolute_time(); + if (abstime) + *abstime = now; + + bt = get_scaled_time(now); + bintime_add(&bt, &clock_calend.bintime); + bintime2usclock(&bt, secs, microsecs); +} + +static void +clock_get_calendar_absolute_and_nanotime_locked( + clock_sec_t *secs, + clock_usec_t *nanosecs, + uint64_t *abstime) +{ + uint64_t now; + struct bintime bt; + + now = mach_absolute_time(); + if (abstime) + *abstime = now; + + bt = get_scaled_time(now); + bintime_add(&bt, &clock_calend.bintime); + bintime2nsclock(&bt, secs, nanosecs); +} + +/* + * clock_get_calendar_absolute_and_microtime: + * + * Returns the current calendar value, + * microseconds as the fraction. Also + * returns mach_absolute_time if abstime + * is not NULL. + */ +void +clock_get_calendar_absolute_and_microtime( + clock_sec_t *secs, + clock_usec_t *microsecs, + uint64_t *abstime) +{ + spl_t s; - absolutetime_to_microtime(now, secs, microsecs); + s = splclock(); + clock_lock(); - *secs += clock_calend.epoch; + clock_get_calendar_absolute_and_microtime_locked(secs, microsecs, abstime); - simple_unlock(&clock_lock); + clock_unlock(); splx(s); } @@ -243,38 +643,17 @@ clock_get_calendar_microtime( */ void clock_get_calendar_nanotime( - uint32_t *secs, - uint32_t *nanosecs) + clock_sec_t *secs, + clock_nsec_t *nanosecs) { - uint64_t now; spl_t s; s = splclock(); - simple_lock(&clock_lock); - - now = mach_absolute_time(); - - if (calend_adjdelta < 0) { - uint32_t t32; + clock_lock(); - if (now > calend_adjstart) { - t32 = now - calend_adjstart; + clock_get_calendar_absolute_and_nanotime_locked(secs, nanosecs, NULL); - if (t32 > calend_adjoffset) - now -= calend_adjoffset; - else - now = calend_adjstart; - } - } - - now += clock_calend.offset; - - absolutetime_to_microtime(now, secs, nanosecs); - *nanosecs *= NSEC_PER_USEC; - - *secs += clock_calend.epoch; - - simple_unlock(&clock_lock); + clock_unlock(); splx(s); } @@ -291,41 +670,56 @@ clock_get_calendar_nanotime( */ void clock_gettimeofday( - uint32_t *secs, - uint32_t *microsecs) + clock_sec_t *secs, + clock_usec_t *microsecs) +{ + clock_gettimeofday_and_absolute_time(secs, microsecs, NULL); +} + +void +clock_gettimeofday_and_absolute_time( + clock_sec_t *secs, + clock_usec_t *microsecs, + uint64_t *mach_time) { uint64_t now; spl_t s; + struct bintime bt; s = splclock(); - simple_lock(&clock_lock); + clock_lock(); now = mach_absolute_time(); + bt = get_scaled_time(now); + bintime_add(&bt, &clock_calend.bintime); + bintime2usclock(&bt, secs, microsecs); - if (calend_adjdelta >= 0) { - clock_gettimeofday_set_commpage(now, clock_calend.epoch, clock_calend.offset, secs, microsecs); - } - else { - uint32_t t32; - - if (now > calend_adjstart) { - t32 = now - calend_adjstart; - - if (t32 > calend_adjoffset) - now -= calend_adjoffset; - else - now = calend_adjstart; - } - - now += clock_calend.offset; + clock_gettimeofday_set_commpage(now, bt.sec, bt.frac, clock_calend.tick_scale_x, ticks_per_sec); - absolutetime_to_microtime(now, secs, microsecs); + clock_unlock(); + splx(s); - *secs += clock_calend.epoch; + if (mach_time) { + *mach_time = now; } +} - simple_unlock(&clock_lock); - splx(s); +static void +update_basesleep(struct bintime delta, bool forward) +{ + /* + * Update basesleep only if the platform does not have monotonic clock. + * In that case the sleep time computation will use the PMU time + * which offset gets modified by settimeofday. + * We don't need this for mononic clock because in that case the sleep + * time computation is independent from the offset value of the PMU. + */ + if (!has_monotonic_clock) { + if (forward) + bintime_add(&clock_calend.basesleep, &delta); + else + bintime_sub(&clock_calend.basesleep, &delta); + } } /* @@ -342,305 +736,681 @@ clock_gettimeofday( */ void clock_set_calendar_microtime( - uint32_t secs, - uint32_t microsecs) + clock_sec_t secs, + clock_usec_t microsecs) { - uint32_t sys, microsys; - uint32_t newsecs; + uint64_t absolutesys; + clock_sec_t newsecs; + clock_sec_t oldsecs; + clock_usec_t newmicrosecs; + clock_usec_t oldmicrosecs; + uint64_t commpage_value; spl_t s; + struct bintime bt; + clock_sec_t deltasecs; + clock_usec_t deltamicrosecs; - newsecs = (microsecs < 500*USEC_PER_SEC)? - secs: secs + 1; - - s = splclock(); - simple_lock(&clock_lock); - - commpage_set_timestamp(0,0,0); + newsecs = secs; + newmicrosecs = microsecs; /* - * Calculate the new calendar epoch based on - * the new value and the system clock. + * settime_lock mtx is used to avoid that racing settimeofdays update the wall clock and + * the platform clock concurrently. + * + * clock_lock cannot be used for this race because it is acquired from interrupt context + * and it needs interrupts disabled while instead updating the platform clock needs to be + * called with interrupts enabled. */ - clock_get_system_microtime(&sys, µsys); - TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC); + lck_mtx_lock(&settime_lock); + + s = splclock(); + clock_lock(); + +#if DEVELOPMENT || DEBUG + struct clock_calend clock_calend_cp = clock_calend; +#endif + commpage_disable_timestamp(); /* * Adjust the boottime based on the delta. */ - clock_boottime += secs - clock_calend.epoch; + clock_get_calendar_absolute_and_microtime_locked(&oldsecs, &oldmicrosecs, &absolutesys); + +#if DEVELOPMENT || DEBUG + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "%s wall %lu s %d u computed with %llu abs\n", + __func__, (unsigned long)oldsecs, oldmicrosecs, absolutesys); + os_log(OS_LOG_DEFAULT, "%s requested %lu s %d u\n", + __func__, (unsigned long)secs, microsecs ); + } +#endif - /* - * Set the new calendar epoch. - */ - clock_calend.epoch = secs; - nanoseconds_to_absolutetime((uint64_t)microsecs * NSEC_PER_USEC, &clock_calend.offset); + if (oldsecs < secs || (oldsecs == secs && oldmicrosecs < microsecs)) { + // moving forwards + deltasecs = secs; + deltamicrosecs = microsecs; - /* - * Cancel any adjustment in progress. - */ - calend_adjdelta = calend_adjtotal = 0; + TIME_SUB(deltasecs, oldsecs, deltamicrosecs, oldmicrosecs, USEC_PER_SEC); + +#if DEVELOPMENT || DEBUG + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "%s delta requested %lu s %d u\n", + __func__, (unsigned long)deltasecs, deltamicrosecs); + } +#endif + + TIME_ADD(clock_boottime, deltasecs, clock_boottime_usec, deltamicrosecs, USEC_PER_SEC); + clock2bintime(&deltasecs, &deltamicrosecs, &bt); + bintime_add(&clock_calend.boottime, &bt); + update_basesleep(bt, TRUE); + } else { + // moving backwards + deltasecs = oldsecs; + deltamicrosecs = oldmicrosecs; + + TIME_SUB(deltasecs, secs, deltamicrosecs, microsecs, USEC_PER_SEC); +#if DEVELOPMENT || DEBUG + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "%s negative delta requested %lu s %d u\n", + __func__, (unsigned long)deltasecs, deltamicrosecs); + } +#endif + + TIME_SUB(clock_boottime, deltasecs, clock_boottime_usec, deltamicrosecs, USEC_PER_SEC); + clock2bintime(&deltasecs, &deltamicrosecs, &bt); + bintime_sub(&clock_calend.boottime, &bt); + update_basesleep(bt, FALSE); + } + + clock_calend.bintime = clock_calend.boottime; + bintime_add(&clock_calend.bintime, &clock_calend.offset); - simple_unlock(&clock_lock); + clock2bintime((clock_sec_t *) &secs, (clock_usec_t *) µsecs, &bt); + + clock_gettimeofday_set_commpage(absolutesys, bt.sec, bt.frac, clock_calend.tick_scale_x, ticks_per_sec); + +#if DEVELOPMENT || DEBUG + struct clock_calend clock_calend_cp1 = clock_calend; +#endif + + commpage_value = clock_boottime * USEC_PER_SEC + clock_boottime_usec; + + clock_unlock(); + splx(s); /* * Set the new value for the platform clock. + * This call might block, so interrupts must be enabled. */ - PESetGMTTimeOfDay(newsecs); +#if DEVELOPMENT || DEBUG + uint64_t now_b = mach_absolute_time(); +#endif - splx(s); + PESetUTCTimeOfDay(newsecs, newmicrosecs); + +#if DEVELOPMENT || DEBUG + uint64_t now_a = mach_absolute_time(); + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "%s mach bef PESet %llu mach aft %llu \n", __func__, now_b, now_a); + } +#endif + + print_all_clock_variables_internal(__func__, &clock_calend_cp); + print_all_clock_variables_internal(__func__, &clock_calend_cp1); + + commpage_update_boottime(commpage_value); /* * Send host notifications. */ host_notify_calendar_change(); + host_notify_calendar_set(); + +#if CONFIG_DTRACE + clock_track_calend_nowait(); +#endif + + lck_mtx_unlock(&settime_lock); } -/* - * clock_initialize_calendar: - * - * Set the calendar and related clocks - * from the platform clock at boot or - * wake event. - * - * Also sends host notifications. - */ +uint64_t mach_absolutetime_asleep = 0; +uint64_t mach_absolutetime_last_sleep = 0; + void -clock_initialize_calendar(void) +clock_get_calendar_uptime(clock_sec_t *secs) { - uint32_t sys, microsys; - uint32_t microsecs = 0, secs = PEGetGMTTimeOfDay(); - spl_t s; + uint64_t now; + spl_t s; + struct bintime bt; s = splclock(); - simple_lock(&clock_lock); - - commpage_set_timestamp(0,0,0); + clock_lock(); - if ((int32_t)secs >= (int32_t)clock_boottime) { - /* - * Initialize the boot time based on the platform clock. - */ - if (clock_boottime == 0) - clock_boottime = secs; - - /* - * Calculate the new calendar epoch based on - * the platform clock and the system clock. - */ - clock_get_system_microtime(&sys, µsys); - TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC); + now = mach_absolute_time(); - /* - * Set the new calendar epoch. - */ - clock_calend.epoch = secs; - nanoseconds_to_absolutetime((uint64_t)microsecs * NSEC_PER_USEC, &clock_calend.offset); + bt = get_scaled_time(now); + bintime_add(&bt, &clock_calend.offset); - /* - * Cancel any adjustment in progress. - */ - calend_adjdelta = calend_adjtotal = 0; - } + *secs = bt.sec; - simple_unlock(&clock_lock); + clock_unlock(); splx(s); - - /* - * Send host notifications. - */ - host_notify_calendar_change(); } -/* - * clock_get_boottime_nanotime: - * - * Return the boottime, used by sysctl. - */ -void -clock_get_boottime_nanotime( - uint32_t *secs, - uint32_t *nanosecs) -{ - *secs = clock_boottime; - *nanosecs = 0; -} /* - * clock_adjtime: + * clock_update_calendar: * - * Interface to adjtime() syscall. - * - * Calculates adjustment variables and - * initiates adjustment. + * called by ntp timer to update scale factors. */ void -clock_adjtime( - int32_t *secs, - int32_t *microsecs) +clock_update_calendar(void) { - uint32_t interval; - spl_t s; + + uint64_t now, delta; + struct bintime bt; + spl_t s; + int64_t adjustment; s = splclock(); - simple_lock(&clock_lock); + clock_lock(); + + now = mach_absolute_time(); + + /* + * scale the time elapsed since the last update and + * add it to offset. + */ + bt = get_scaled_time(now); + bintime_add(&clock_calend.offset, &bt); + + /* + * update the base from which apply next scale factors. + */ + delta = now - clock_calend.offset_count; + clock_calend.offset_count += delta; - interval = calend_set_adjustment(secs, microsecs); - if (interval != 0) { - calend_adjdeadline = mach_absolute_time() + interval; - if (!timer_call_enter(&calend_adjcall, calend_adjdeadline)) - calend_adjactive++; + clock_calend.bintime = clock_calend.offset; + bintime_add(&clock_calend.bintime, &clock_calend.boottime); + + /* + * recompute next adjustment. + */ + ntp_update_second(&adjustment, clock_calend.bintime.sec); + +#if DEVELOPMENT || DEBUG + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "%s adjustment %lld\n", __func__, adjustment); } - else - if (timer_call_cancel(&calend_adjcall)) - calend_adjactive--; +#endif + + /* + * recomputing scale factors. + */ + get_scale_factors_from_adj(adjustment, &clock_calend.tick_scale_x, &clock_calend.s_scale_ns, &clock_calend.s_adj_nsx); + + clock_gettimeofday_set_commpage(now, clock_calend.bintime.sec, clock_calend.bintime.frac, clock_calend.tick_scale_x, ticks_per_sec); + +#if DEVELOPMENT || DEBUG + struct clock_calend calend_cp = clock_calend; +#endif - simple_unlock(&clock_lock); + clock_unlock(); splx(s); + + print_all_clock_variables(__func__, NULL,NULL,NULL,NULL, &calend_cp); } -static uint32_t -calend_set_adjustment( - int32_t *secs, - int32_t *microsecs) -{ - uint64_t now, t64; - int64_t total, ototal; - uint32_t interval = 0; - total = (int64_t)*secs * NSEC_PER_SEC + *microsecs * NSEC_PER_USEC; +#if DEVELOPMENT || DEBUG + +void print_all_clock_variables_internal(const char* func, struct clock_calend* clock_calend_cp) +{ + clock_sec_t offset_secs; + clock_usec_t offset_microsecs; + clock_sec_t bintime_secs; + clock_usec_t bintime_microsecs; + clock_sec_t bootime_secs; + clock_usec_t bootime_microsecs; + + if (!g_should_log_clock_adjustments) + return; + + bintime2usclock(&clock_calend_cp->offset, &offset_secs, &offset_microsecs); + bintime2usclock(&clock_calend_cp->bintime, &bintime_secs, &bintime_microsecs); + bintime2usclock(&clock_calend_cp->boottime, &bootime_secs, &bootime_microsecs); + + os_log(OS_LOG_DEFAULT, "%s s_scale_ns %llu s_adj_nsx %lld tick_scale_x %llu offset_count %llu\n", + func , clock_calend_cp->s_scale_ns, clock_calend_cp->s_adj_nsx, + clock_calend_cp->tick_scale_x, clock_calend_cp->offset_count); + os_log(OS_LOG_DEFAULT, "%s offset.sec %ld offset.frac %llu offset_secs %lu offset_microsecs %d\n", + func, clock_calend_cp->offset.sec, clock_calend_cp->offset.frac, + (unsigned long)offset_secs, offset_microsecs); + os_log(OS_LOG_DEFAULT, "%s bintime.sec %ld bintime.frac %llu bintime_secs %lu bintime_microsecs %d\n", + func, clock_calend_cp->bintime.sec, clock_calend_cp->bintime.frac, + (unsigned long)bintime_secs, bintime_microsecs); + os_log(OS_LOG_DEFAULT, "%s bootime.sec %ld bootime.frac %llu bootime_secs %lu bootime_microsecs %d\n", + func, clock_calend_cp->boottime.sec, clock_calend_cp->boottime.frac, + (unsigned long)bootime_secs, bootime_microsecs); + + clock_sec_t basesleep_secs; + clock_usec_t basesleep_microsecs; + + bintime2usclock(&clock_calend_cp->basesleep, &basesleep_secs, &basesleep_microsecs); + os_log(OS_LOG_DEFAULT, "%s basesleep.sec %ld basesleep.frac %llu basesleep_secs %lu basesleep_microsecs %d\n", + func, clock_calend_cp->basesleep.sec, clock_calend_cp->basesleep.frac, + (unsigned long)basesleep_secs, basesleep_microsecs); - commpage_set_timestamp(0,0,0); +} - now = mach_absolute_time(); - ototal = calend_adjtotal; +void print_all_clock_variables(const char* func, clock_sec_t* pmu_secs, clock_usec_t* pmu_usec, clock_sec_t* sys_secs, clock_usec_t* sys_usec, struct clock_calend* clock_calend_cp) +{ + if (!g_should_log_clock_adjustments) + return; - if (total != 0) { - int32_t delta = calend_adjskew; + struct bintime bt; + clock_sec_t wall_secs; + clock_usec_t wall_microsecs; + uint64_t now; + uint64_t delta; - if (total > 0) { - if (total > calend_adjbig) - delta *= 10; - if (delta > total) - delta = total; + if (pmu_secs) { + os_log(OS_LOG_DEFAULT, "%s PMU %lu s %d u \n", func, (unsigned long)*pmu_secs, *pmu_usec); + } + if (sys_secs) { + os_log(OS_LOG_DEFAULT, "%s sys %lu s %d u \n", func, (unsigned long)*sys_secs, *sys_usec); + } - nanoseconds_to_absolutetime((uint64_t)delta, &t64); - calend_adjoffset = t64; - } - else { - if (total < -calend_adjbig) - delta *= 10; - delta = -delta; - if (delta < total) - delta = total; + print_all_clock_variables_internal(func, clock_calend_cp); - calend_adjstart = now; + now = mach_absolute_time(); + delta = now - clock_calend_cp->offset_count; - nanoseconds_to_absolutetime((uint64_t)-delta, &t64); - calend_adjoffset = t64; - } + bt = scale_delta(delta, clock_calend_cp->tick_scale_x, clock_calend_cp->s_scale_ns, clock_calend_cp->s_adj_nsx); + bintime_add(&bt, &clock_calend_cp->bintime); + bintime2usclock(&bt, &wall_secs, &wall_microsecs); - calend_adjtotal = total; - calend_adjdelta = delta; + os_log(OS_LOG_DEFAULT, "%s wall %lu s %d u computed with %llu abs\n", + func, (unsigned long)wall_secs, wall_microsecs, now); +} - interval = calend_adjinterval; - } - else - calend_adjdelta = calend_adjtotal = 0; - if (ototal != 0) { - *secs = ototal / NSEC_PER_SEC; - *microsecs = (ototal % NSEC_PER_SEC) / NSEC_PER_USEC; - } - else - *secs = *microsecs = 0; +#endif /* DEVELOPMENT || DEBUG */ - return (interval); -} -static void -calend_adjust_call(void) +/* + * clock_initialize_calendar: + * + * Set the calendar and related clocks + * from the platform clock at boot. + * + * Also sends host notifications. + */ +void +clock_initialize_calendar(void) { - uint32_t interval; - spl_t s; + clock_sec_t sys; // sleepless time since boot in seconds + clock_sec_t secs; // Current UTC time + clock_sec_t utc_offset_secs; // Difference in current UTC time and sleepless time since boot + clock_usec_t microsys; + clock_usec_t microsecs; + clock_usec_t utc_offset_microsecs; + spl_t s; + struct bintime bt; + struct bintime monotonic_bt; + struct latched_time monotonic_time; + uint64_t monotonic_usec_total; + clock_sec_t sys2, monotonic_sec; + clock_usec_t microsys2, monotonic_usec; + size_t size; + + //Get PMU time with offset and corresponding sys time + PEGetUTCTimeOfDay(&secs, µsecs); + clock_get_system_microtime(&sys, µsys); + + /* + * If the platform has a monotonic clock, use kern.monotonicclock_usecs + * to estimate the sleep/wake time, otherwise use the PMU and adjustments + * provided through settimeofday to estimate the sleep time. + * NOTE: the latter case relies that the kernel is the only component + * to set the PMU offset. + */ + size = sizeof(monotonic_time); + if (kernel_sysctlbyname("kern.monotonicclock_usecs", &monotonic_time, &size, NULL, 0) != 0) { + has_monotonic_clock = 0; + os_log(OS_LOG_DEFAULT, "%s system does not have monotonic clock.\n", __func__); + } else { + has_monotonic_clock = 1; + monotonic_usec_total = monotonic_time.monotonic_time_usec; + absolutetime_to_microtime(monotonic_time.mach_time, &sys2, µsys2); + os_log(OS_LOG_DEFAULT, "%s system has monotonic clock.\n", __func__); + } s = splclock(); - simple_lock(&clock_lock); + clock_lock(); - if (--calend_adjactive == 0) { - interval = calend_adjust(); - if (interval != 0) { - clock_deadline_for_periodic_event(interval, mach_absolute_time(), - &calend_adjdeadline); + commpage_disable_timestamp(); - if (!timer_call_enter(&calend_adjcall, calend_adjdeadline)) - calend_adjactive++; - } + utc_offset_secs = secs; + utc_offset_microsecs = microsecs; + +#if DEVELOPMENT || DEBUG + last_utc_sec = secs; + last_utc_usec = microsecs; + last_sys_sec = sys; + last_sys_usec = microsys; + if (secs > max_utc_sec) + max_utc_sec = secs; +#endif + + /* + * We normally expect the UTC clock to be always-on and produce + * greater readings than the tick counter. There may be corner cases + * due to differing clock resolutions (UTC clock is likely lower) and + * and errors reading the UTC clock (some implementations return 0 + * on error) in which that doesn't hold true. Bring the UTC measurements + * in-line with the tick counter measurements as a best effort in that case. + */ + //FIXME if the current time is prior than 1970 secs will be negative + if ((sys > secs) || ((sys == secs) && (microsys > microsecs))) { + os_log(OS_LOG_DEFAULT, "%s WARNING: PMU offset is less then sys PMU %lu s %d u sys %lu s %d u\n", + __func__, (unsigned long) secs, microsecs, (unsigned long)sys, microsys); + secs = utc_offset_secs = sys; + microsecs = utc_offset_microsecs = microsys; + } + + // PMU time with offset - sys + // This macro stores the subtraction result in utc_offset_secs and utc_offset_microsecs + TIME_SUB(utc_offset_secs, sys, utc_offset_microsecs, microsys, USEC_PER_SEC); + + clock2bintime(&utc_offset_secs, &utc_offset_microsecs, &bt); + + /* + * Initialize the boot time based on the platform clock. + */ + clock_boottime = secs; + clock_boottime_usec = microsecs; + commpage_update_boottime(clock_boottime * USEC_PER_SEC + clock_boottime_usec); + + nanoseconds_to_absolutetime((uint64_t)NSEC_PER_SEC, &ticks_per_sec); + clock_calend.boottime = bt; + clock_calend.bintime = bt; + clock_calend.offset.sec = 0; + clock_calend.offset.frac = 0; + + clock_calend.tick_scale_x = (uint64_t)1 << 63; + clock_calend.tick_scale_x /= ticks_per_sec; + clock_calend.tick_scale_x *= 2; + + clock_calend.s_scale_ns = NSEC_PER_SEC; + clock_calend.s_adj_nsx = 0; + + if (has_monotonic_clock) { + + monotonic_sec = monotonic_usec_total / (clock_sec_t)USEC_PER_SEC; + monotonic_usec = monotonic_usec_total % (clock_usec_t)USEC_PER_SEC; + + // PMU time without offset - sys + // This macro stores the subtraction result in monotonic_sec and monotonic_usec + TIME_SUB(monotonic_sec, sys2, monotonic_usec, microsys2, USEC_PER_SEC); + clock2bintime(&monotonic_sec, &monotonic_usec, &monotonic_bt); + + // set the baseleep as the difference between monotonic clock - sys + clock_calend.basesleep = monotonic_bt; + } else { + // set the baseleep as the difference between PMU clock - sys + clock_calend.basesleep = bt; } + commpage_update_mach_continuous_time(mach_absolutetime_asleep); + +#if DEVELOPMENT || DEBUG + struct clock_calend clock_calend_cp = clock_calend; +#endif - simple_unlock(&clock_lock); + clock_unlock(); splx(s); + + print_all_clock_variables(__func__, &secs, µsecs, &sys, µsys, &clock_calend_cp); + + /* + * Send host notifications. + */ + host_notify_calendar_change(); + +#if CONFIG_DTRACE + clock_track_calend_nowait(); +#endif } -static uint32_t -calend_adjust(void) -{ - uint64_t now, t64; - int32_t delta; - uint32_t interval = 0; - commpage_set_timestamp(0,0,0); +void +clock_wakeup_calendar(void) +{ + clock_sec_t sys; + clock_sec_t secs; + clock_usec_t microsys; + clock_usec_t microsecs; + spl_t s; + struct bintime bt, last_sleep_bt; + clock_sec_t basesleep_s, last_sleep_sec; + clock_usec_t basesleep_us, last_sleep_usec; + struct latched_time monotonic_time; + uint64_t monotonic_usec_total; + size_t size; + clock_sec_t secs_copy; + clock_usec_t microsecs_copy; +#if DEVELOPMENT || DEBUG + clock_sec_t utc_sec; + clock_usec_t utc_usec; + PEGetUTCTimeOfDay(&utc_sec, &utc_usec); +#endif - now = mach_absolute_time(); + /* + * If the platform has the monotonic clock use that to + * compute the sleep time. The monotonic clock does not have an offset + * that can be modified, so nor kernel or userspace can change the time + * of this clock, it can only monotonically increase over time. + * During sleep mach_absolute_time does not tick, + * so the sleep time is the difference betwen the current monotonic time + * less the absolute time and the previous difference stored at wake time. + * + * basesleep = monotonic - sys ---> computed at last wake + * sleep_time = (monotonic - sys) - basesleep + * + * If the platform does not support monotonic time we use the PMU time + * to compute the last sleep. + * The PMU time is the monotonic clock + an offset that can be set + * by kernel. + * + * IMPORTANT: + * We assume that only the kernel is setting the offset of the PMU and that + * it is doing it only througth the settimeofday interface. + * + * basesleep is the different between the PMU time and the mach_absolute_time + * at wake. + * During awake time settimeofday can change the PMU offset by a delta, + * and basesleep is shifted by the same delta applyed to the PMU. So the sleep + * time computation becomes: + * + * PMU = monotonic + PMU_offset + * basesleep = PMU - sys ---> computed at last wake + * basesleep += settimeofday_delta + * PMU_offset += settimeofday_delta + * sleep_time = (PMU - sys) - basesleep + */ + if (has_monotonic_clock) { + //Get monotonic time with corresponding sys time + size = sizeof(monotonic_time); + if (kernel_sysctlbyname("kern.monotonicclock_usecs", &monotonic_time, &size, NULL, 0) != 0) { + panic("%s: could not call kern.monotonicclock_usecs", __func__); + } + monotonic_usec_total = monotonic_time.monotonic_time_usec; + absolutetime_to_microtime(monotonic_time.mach_time, &sys, µsys); + + secs = monotonic_usec_total / (clock_sec_t)USEC_PER_SEC; + microsecs = monotonic_usec_total % (clock_usec_t)USEC_PER_SEC; + } else { + //Get PMU time with offset and corresponding sys time + PEGetUTCTimeOfDay(&secs, µsecs); + clock_get_system_microtime(&sys, µsys); - delta = calend_adjdelta; + } - if (delta > 0) { - clock_calend.offset += calend_adjoffset; + s = splclock(); + clock_lock(); + + commpage_disable_timestamp(); + + secs_copy = secs; + microsecs_copy = microsecs; + +#if DEVELOPMENT || DEBUG + struct clock_calend clock_calend_cp1 = clock_calend; +#endif /* DEVELOPMENT || DEBUG */ + +#if DEVELOPMENT || DEBUG + last_utc_sec = secs; + last_utc_usec = microsecs; + last_sys_sec = sys; + last_sys_usec = microsys; + if (secs > max_utc_sec) + max_utc_sec = secs; +#endif + /* + * We normally expect the UTC clock to be always-on and produce + * greater readings than the tick counter. There may be corner cases + * due to differing clock resolutions (UTC clock is likely lower) and + * and errors reading the UTC clock (some implementations return 0 + * on error) in which that doesn't hold true. Bring the UTC measurements + * in-line with the tick counter measurements as a best effort in that case. + */ + //FIXME if the current time is prior than 1970 secs will be negative + if ((sys > secs) || ((sys == secs) && (microsys > microsecs))) { + os_log(OS_LOG_DEFAULT, "%s WARNING: %s is less then sys %s %lu s %d u sys %lu s %d u\n", + __func__, (has_monotonic_clock)?"monotonic":"PMU", (has_monotonic_clock)?"monotonic":"PMU", (unsigned long)secs, microsecs, (unsigned long)sys, microsys); + secs = sys; + microsecs = microsys; + } - calend_adjtotal -= delta; - if (delta > calend_adjtotal) { - calend_adjdelta = delta = calend_adjtotal; + // PMU or monotonic - sys + // This macro stores the subtraction result in secs and microsecs + TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC); + clock2bintime(&secs, µsecs, &bt); - nanoseconds_to_absolutetime((uint64_t)delta, &t64); - calend_adjoffset = t64; - } + /* + * Safety belt: the UTC clock will likely have a lower resolution than the tick counter. + * It's also possible that the device didn't fully transition to the powered-off state on + * the most recent sleep, so the tick counter may not have reset or may have only briefly + * tured off. In that case it's possible for the difference between the UTC clock and the + * tick counter to be less than the previously recorded value in clock.calend.basesleep. + * In that case simply record that we slept for 0 ticks. + */ + if ((bt.sec > clock_calend.basesleep.sec) || + ((bt.sec == clock_calend.basesleep.sec) && (bt.frac > clock_calend.basesleep.frac))) { + + //last_sleep is the difference between current PMU or monotonic - abs and last wake PMU or monotonic - abs + last_sleep_bt = bt; + bintime_sub(&last_sleep_bt, &clock_calend.basesleep); + + //set baseseep to current PMU or monotonic - abs + clock_calend.basesleep = bt; + bintime2usclock(&last_sleep_bt, &last_sleep_sec, &last_sleep_usec); + bintime2absolutetime(&last_sleep_bt, &mach_absolutetime_last_sleep); + mach_absolutetime_asleep += mach_absolutetime_last_sleep; + + bintime_add(&clock_calend.offset, &last_sleep_bt); + bintime_add(&clock_calend.bintime, &last_sleep_bt); + + } else{ + mach_absolutetime_last_sleep = 0; + last_sleep_sec = last_sleep_usec = 0; + bintime2usclock(&clock_calend.basesleep, &basesleep_s, &basesleep_us); + os_log(OS_LOG_DEFAULT, "%s WARNING: basesleep (%lu s %d u) > %s-sys (%lu s %d u) \n", + __func__, (unsigned long) basesleep_s, basesleep_us, (has_monotonic_clock)?"monotonic":"PMU", (unsigned long) secs_copy, microsecs_copy ); } - else - if (delta < 0) { - clock_calend.offset -= calend_adjoffset; - calend_adjtotal -= delta; - if (delta < calend_adjtotal) { - calend_adjdelta = delta = calend_adjtotal; + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_CLOCK,MACH_EPOCH_CHANGE) | DBG_FUNC_NONE, + (uintptr_t) mach_absolutetime_last_sleep, + (uintptr_t) mach_absolutetime_asleep, + (uintptr_t) (mach_absolutetime_last_sleep >> 32), + (uintptr_t) (mach_absolutetime_asleep >> 32), + 0); - nanoseconds_to_absolutetime((uint64_t)-delta, &t64); - calend_adjoffset = t64; - } + commpage_update_mach_continuous_time(mach_absolutetime_asleep); + adjust_cont_time_thread_calls(); - if (calend_adjdelta != 0) - calend_adjstart = now; +#if DEVELOPMENT || DEBUG + struct clock_calend clock_calend_cp = clock_calend; +#endif + + clock_unlock(); + splx(s); + +#if DEVELOPMENT || DEBUG + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "PMU was %lu s %d u\n",(unsigned long) utc_sec, utc_usec); + os_log(OS_LOG_DEFAULT, "last sleep was %lu s %d u\n",(unsigned long) last_sleep_sec, last_sleep_usec); + print_all_clock_variables("clock_wakeup_calendar:BEFORE", + &secs_copy, µsecs_copy, &sys, µsys, &clock_calend_cp1); + print_all_clock_variables("clock_wakeup_calendar:AFTER", NULL, NULL, NULL, NULL, &clock_calend_cp); } +#endif /* DEVELOPMENT || DEBUG */ - if (calend_adjdelta != 0) - interval = calend_adjinterval; + host_notify_calendar_change(); - return (interval); +#if CONFIG_DTRACE + clock_track_calend_nowait(); +#endif } + /* - * clock_wakeup_calendar: + * clock_get_boottime_nanotime: * - * Interface to power management, used - * to initiate the reset of the calendar - * on wake from sleep event. + * Return the boottime, used by sysctl. */ void -clock_wakeup_calendar(void) +clock_get_boottime_nanotime( + clock_sec_t *secs, + clock_nsec_t *nanosecs) { - thread_call_enter(&calend_wakecall); + spl_t s; + + s = splclock(); + clock_lock(); + + *secs = (clock_sec_t)clock_boottime; + *nanosecs = (clock_nsec_t)clock_boottime_usec * NSEC_PER_USEC; + + clock_unlock(); + splx(s); } +/* + * clock_get_boottime_nanotime: + * + * Return the boottime, used by sysctl. + */ +void +clock_get_boottime_microtime( + clock_sec_t *secs, + clock_usec_t *microsecs) +{ + spl_t s; + + s = splclock(); + clock_lock(); + + *secs = (clock_sec_t)clock_boottime; + *microsecs = (clock_nsec_t)clock_boottime_usec; + + clock_unlock(); + splx(s); +} + + /* * Wait / delay routines. */ @@ -653,6 +1423,15 @@ mach_wait_until_continue( /*NOTREACHED*/ } +/* + * mach_wait_until_trap: Suspend execution of calling thread until the specified time has passed + * + * Parameters: args->deadline Amount of time to wait + * + * Returns: 0 Success + * !0 Not success + * + */ kern_return_t mach_wait_until_trap( struct mach_wait_until_trap_args *args) @@ -660,7 +1439,8 @@ mach_wait_until_trap( uint64_t deadline = args->deadline; wait_result_t wresult; - wresult = assert_wait_deadline((event_t)mach_wait_until_trap, THREAD_ABORTSAFE, deadline); + wresult = assert_wait_deadline_with_leeway((event_t)mach_wait_until_trap, THREAD_ABORTSAFE, + TIMEOUT_URGENCY_USER_NORMAL, deadline, 0); if (wresult == THREAD_WAITING) wresult = thread_block(mach_wait_until_continue); @@ -676,12 +1456,50 @@ clock_delay_until( if (now >= deadline) return; - if ( (deadline - now) < (8 * sched_cswtime) || + _clock_delay_until_deadline(deadline - now, deadline); +} + +/* + * Preserve the original precise interval that the client + * requested for comparison to the spin threshold. + */ +void +_clock_delay_until_deadline( + uint64_t interval, + uint64_t deadline) +{ + _clock_delay_until_deadline_with_leeway(interval, deadline, 0); +} + +/* + * Like _clock_delay_until_deadline, but it accepts a + * leeway value. + */ +void +_clock_delay_until_deadline_with_leeway( + uint64_t interval, + uint64_t deadline, + uint64_t leeway) +{ + + if (interval == 0) + return; + + if ( ml_delay_should_spin(interval) || get_preemption_level() != 0 || - ml_get_interrupts_enabled() == FALSE ) - machine_delay_until(deadline); - else { - assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline - sched_cswtime); + ml_get_interrupts_enabled() == FALSE ) { + machine_delay_until(interval, deadline); + } else { + /* + * For now, assume a leeway request of 0 means the client does not want a leeway + * value. We may want to change this interpretation in the future. + */ + + if (leeway) { + assert_wait_deadline_with_leeway((event_t)clock_delay_until, THREAD_UNINT, TIMEOUT_URGENCY_LEEWAY, deadline, leeway); + } else { + assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline); + } thread_block(THREAD_CONTINUE_NULL); } @@ -692,11 +1510,26 @@ delay_for_interval( uint32_t interval, uint32_t scale_factor) { - uint64_t end; + uint64_t abstime; + + clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime); + + _clock_delay_until_deadline(abstime, mach_absolute_time() + abstime); +} + +void +delay_for_interval_with_leeway( + uint32_t interval, + uint32_t leeway, + uint32_t scale_factor) +{ + uint64_t abstime_interval; + uint64_t abstime_leeway; - clock_interval_to_deadline(interval, scale_factor, &end); + clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime_interval); + clock_interval_to_absolutetime_interval(leeway, scale_factor, &abstime_leeway); - clock_delay_until(end); + _clock_delay_until_deadline_with_leeway(abstime_interval, mach_absolute_time() + abstime_interval, abstime_leeway); } void @@ -730,6 +1563,14 @@ clock_absolutetime_interval_to_deadline( *result = mach_absolute_time() + abstime; } +void +clock_continuoustime_interval_to_deadline( + uint64_t conttime, + uint64_t *result) +{ + *result = mach_continuous_time() + conttime; +} + void clock_get_uptime( uint64_t *result) @@ -755,3 +1596,145 @@ clock_deadline_for_periodic_event( *deadline = abstime + interval; } } + +uint64_t +mach_continuous_time(void) +{ + while(1) { + uint64_t read1 = mach_absolutetime_asleep; + uint64_t absolute = mach_absolute_time(); + OSMemoryBarrier(); + uint64_t read2 = mach_absolutetime_asleep; + + if(__builtin_expect(read1 == read2, 1)) { + return absolute + read1; + } + } +} + +uint64_t +mach_continuous_approximate_time(void) +{ + while(1) { + uint64_t read1 = mach_absolutetime_asleep; + uint64_t absolute = mach_approximate_time(); + OSMemoryBarrier(); + uint64_t read2 = mach_absolutetime_asleep; + + if(__builtin_expect(read1 == read2, 1)) { + return absolute + read1; + } + } +} + +/* + * continuoustime_to_absolutetime + * Must be called with interrupts disabled + * Returned value is only valid until the next update to + * mach_continuous_time + */ +uint64_t +continuoustime_to_absolutetime(uint64_t conttime) { + if (conttime <= mach_absolutetime_asleep) + return 0; + else + return conttime - mach_absolutetime_asleep; +} + +/* + * absolutetime_to_continuoustime + * Must be called with interrupts disabled + * Returned value is only valid until the next update to + * mach_continuous_time + */ +uint64_t +absolutetime_to_continuoustime(uint64_t abstime) { + return abstime + mach_absolutetime_asleep; +} + +#if CONFIG_DTRACE + +/* + * clock_get_calendar_nanotime_nowait + * + * Description: Non-blocking version of clock_get_calendar_nanotime() + * + * Notes: This function operates by separately tracking calendar time + * updates using a two element structure to copy the calendar + * state, which may be asynchronously modified. It utilizes + * barrier instructions in the tracking process and in the local + * stable snapshot process in order to ensure that a consistent + * snapshot is used to perform the calculation. + */ +void +clock_get_calendar_nanotime_nowait( + clock_sec_t *secs, + clock_nsec_t *nanosecs) +{ + int i = 0; + uint64_t now; + struct unlocked_clock_calend stable; + struct bintime bt; + + for (;;) { + stable = flipflop[i]; /* take snapshot */ + + /* + * Use a barrier instructions to ensure atomicity. We AND + * off the "in progress" bit to get the current generation + * count. + */ + (void)hw_atomic_and(&stable.gen, ~(uint32_t)1); + + /* + * If an update _is_ in progress, the generation count will be + * off by one, if it _was_ in progress, it will be off by two, + * and if we caught it at a good time, it will be equal (and + * our snapshot is threfore stable). + */ + if (flipflop[i].gen == stable.gen) + break; + + /* Switch to the other element of the flipflop, and try again. */ + i ^= 1; + } + + now = mach_absolute_time(); + + bt = get_scaled_time(now); + + bintime_add(&bt, &clock_calend.bintime); + + bintime2nsclock(&bt, secs, nanosecs); +} + +static void +clock_track_calend_nowait(void) +{ + int i; + + for (i = 0; i < 2; i++) { + struct clock_calend tmp = clock_calend; + + /* + * Set the low bit if the generation count; since we use a + * barrier instruction to do this, we are guaranteed that this + * will flag an update in progress to an async caller trying + * to examine the contents. + */ + (void)hw_atomic_or(&flipflop[i].gen, 1); + + flipflop[i].calend = tmp; + + /* + * Increment the generation count to clear the low bit to + * signal completion. If a caller compares the generation + * count after taking a copy while in progress, the count + * will be off by two. + */ + (void)hw_atomic_add(&flipflop[i].gen, 1); + } +} + +#endif /* CONFIG_DTRACE */ +