X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/5d5c5d0d5b79ade9a973d55186ffda2638ba2b6e..d26ffc64f583ab2d29df48f13518685602bc8832:/osfmk/kern/clock.c

diff --git a/osfmk/kern/clock.c b/osfmk/kern/clock.c
index a3266197e..9bd9f3b0e 100644
--- a/osfmk/kern/clock.c
+++ b/osfmk/kern/clock.c
@@ -1,101 +1,323 @@
 /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
- * This file contains Original Code and/or Modifications of Original Code 
- * as defined in and that are subject to the Apple Public Source License 
- * Version 2.0 (the 'License'). You may not use this file except in 
- * compliance with the License.  The rights granted to you under the 
- * License may not be used to create, or enable the creation or 
- * redistribution of, unlawful or unlicensed copies of an Apple operating 
- * system, or to circumvent, violate, or enable the circumvention or 
- * violation of, any terms of an Apple operating system software license 
- * agreement.
- *
- * Please obtain a copy of the License at 
- * http://www.opensource.apple.com/apsl/ and read it before using this 
- * file.
- *
- * The Original Code and all software distributed under the License are 
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
- * Please see the License for the specific language governing rights and 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
  * limitations under the License.
- *
- * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
  */
 /*
  */
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)time.h	8.5 (Berkeley) 5/4/95
+ * $FreeBSD$
+ */
 
 #include <mach/mach_types.h>
 
-#include <kern/lock.h>
 #include <kern/spl.h>
 #include <kern/sched_prim.h>
 #include <kern/thread.h>
 #include <kern/clock.h>
 #include <kern/host_notify.h>
+#include <kern/thread_call.h>
+#include <libkern/OSAtomic.h>
 
 #include <IOKit/IOPlatformExpert.h>
 
 #include <machine/commpage.h>
+#include <machine/config.h>
+#include <machine/machine_routines.h>
 
 #include <mach/mach_traps.h>
 #include <mach/mach_time.h>
 
-decl_simple_lock_data(static,clock_lock)
+#include <sys/kdebug.h>
+#include <sys/timex.h>
+#include <kern/arithmetic_128.h>
+#include <os/log.h>
+
+uint32_t	hz_tick_interval = 1;
+static uint64_t has_monotonic_clock = 0;
+
+decl_simple_lock_data(,clock_lock)
+lck_grp_attr_t * settime_lock_grp_attr;
+lck_grp_t * settime_lock_grp;
+lck_attr_t * settime_lock_attr;
+lck_mtx_t settime_lock;
+
+#define clock_lock()	\
+	simple_lock(&clock_lock)
+
+#define clock_unlock()	\
+	simple_unlock(&clock_lock)
+
+#define clock_lock_init()	\
+	simple_lock_init(&clock_lock, 0)
+
+#ifdef kdp_simple_lock_is_acquired
+boolean_t kdp_clock_is_locked()
+{
+	return kdp_simple_lock_is_acquired(&clock_lock);
+}
+#endif
+
+struct bintime {
+	time_t	sec;
+	uint64_t frac;
+};
+
+static __inline void
+bintime_addx(struct bintime *_bt, uint64_t _x)
+{
+	uint64_t _u;
+
+	_u = _bt->frac;
+	_bt->frac += _x;
+	if (_u > _bt->frac)
+		_bt->sec++;
+}
+
+static __inline void
+bintime_subx(struct bintime *_bt, uint64_t _x)
+{
+	uint64_t _u;
+
+	_u = _bt->frac;
+	_bt->frac -= _x;
+	if (_u < _bt->frac)
+		_bt->sec--;
+}
+
+static __inline void
+bintime_addns(struct bintime *bt, uint64_t ns)
+{
+	bt->sec += ns/ (uint64_t)NSEC_PER_SEC;
+	ns = ns % (uint64_t)NSEC_PER_SEC;
+	if (ns) {
+		/* 18446744073 = int(2^64 / NSEC_PER_SEC) */
+		ns = ns * (uint64_t)18446744073LL;
+		bintime_addx(bt, ns);
+	}
+}
+
+static __inline void
+bintime_subns(struct bintime *bt, uint64_t ns)
+{
+	bt->sec -= ns/ (uint64_t)NSEC_PER_SEC;
+	ns = ns % (uint64_t)NSEC_PER_SEC;
+	if (ns) {
+		/* 18446744073 = int(2^64 / NSEC_PER_SEC) */
+		ns = ns * (uint64_t)18446744073LL;
+		bintime_subx(bt, ns);
+	}
+}
+
+static __inline void
+bintime_addxns(struct bintime *bt, uint64_t a, int64_t xns)
+{
+	uint64_t uxns = (xns > 0)?(uint64_t )xns:(uint64_t)-xns;
+	uint64_t ns = multi_overflow(a, uxns);
+	if (xns > 0) {
+		if (ns)
+			bintime_addns(bt, ns);
+		ns = (a * uxns) / (uint64_t)NSEC_PER_SEC;
+		bintime_addx(bt, ns);
+	}
+	else{
+		if (ns)
+			bintime_subns(bt, ns);
+		ns = (a * uxns) / (uint64_t)NSEC_PER_SEC;
+		bintime_subx(bt,ns);
+	}
+}
+
+
+static __inline void
+bintime_add(struct bintime *_bt, const struct bintime *_bt2)
+{
+	uint64_t _u;
+
+	_u = _bt->frac;
+	_bt->frac += _bt2->frac;
+	if (_u > _bt->frac)
+		_bt->sec++;
+	_bt->sec += _bt2->sec;
+}
+
+static __inline void
+bintime_sub(struct bintime *_bt, const struct bintime *_bt2)
+{
+	uint64_t _u;
+
+	_u = _bt->frac;
+	_bt->frac -= _bt2->frac;
+	if (_u < _bt->frac)
+		_bt->sec--;
+	_bt->sec -= _bt2->sec;
+}
+
+static __inline void
+clock2bintime(const clock_sec_t *secs, const clock_usec_t *microsecs, struct bintime *_bt)
+{
+
+	_bt->sec = *secs;
+	/* 18446744073709 = int(2^64 / 1000000) */
+	_bt->frac = *microsecs * (uint64_t)18446744073709LL;
+}
+
+static __inline void
+bintime2usclock(const struct bintime *_bt, clock_sec_t *secs, clock_usec_t *microsecs)
+{
+
+	*secs = _bt->sec;
+	*microsecs = ((uint64_t)USEC_PER_SEC * (uint32_t)(_bt->frac >> 32)) >> 32;
+}
+
+static __inline void
+bintime2nsclock(const struct bintime *_bt, clock_sec_t *secs, clock_usec_t *nanosecs)
+{
+
+	*secs = _bt->sec;
+	*nanosecs = ((uint64_t)NSEC_PER_SEC * (uint32_t)(_bt->frac >> 32)) >> 32;
+}
+
+static __inline void
+bintime2absolutetime(const struct bintime *_bt, uint64_t *abs)
+{
+	uint64_t nsec;
+	nsec = (uint64_t) _bt->sec * (uint64_t)NSEC_PER_SEC + (((uint64_t)NSEC_PER_SEC * (uint32_t)(_bt->frac >> 32)) >> 32);
+	nanoseconds_to_absolutetime(nsec, abs);
+}
+
+struct latched_time {
+        uint64_t monotonic_time_usec;
+        uint64_t mach_time;
+};
+
+extern int
+kernel_sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
 /*
  *	Time of day (calendar) variables.
  *
  *	Algorithm:
  *
- *	TOD <- (seconds + epoch, fraction) <- CONV(current absolute time + offset)
+ *	TOD <- bintime + delta*scale
  *
- *	where CONV converts absolute time units into seconds and a fraction.
+ *	where :
+ * 	bintime is a cumulative offset that includes bootime and scaled time elapsed betweed bootime and last scale update.
+ *	delta is ticks elapsed since last scale update.
+ *	scale is computed according to an adjustment provided by ntp_kern.
  */
 static struct clock_calend {
-	uint64_t			epoch;
-	uint64_t			offset;
-}					clock_calend;
+	uint64_t		s_scale_ns; /* scale to apply for each second elapsed, it converts in ns */
+	int64_t			s_adj_nsx; /* additional adj to apply for each second elapsed, it is expressed in 64 bit frac of ns */
+	uint64_t		tick_scale_x; /* scale to apply for each tick elapsed, it converts in 64 bit frac of s */
+	uint64_t 		offset_count; /* abs time from which apply current scales */
+	struct bintime		offset; /* cumulative offset expressed in (sec, 64 bits frac of a second) */
+	struct bintime		bintime; /* cumulative offset (it includes bootime) expressed in (sec, 64 bits frac of a second) */
+	struct bintime		boottime; /* boot time expressed in (sec, 64 bits frac of a second) */
+	struct bintime		basesleep;
+} clock_calend;
+
+static uint64_t ticks_per_sec; /* ticks in a second (expressed in abs time) */
+
+#if DEVELOPMENT || DEBUG
+clock_sec_t last_utc_sec = 0;
+clock_usec_t last_utc_usec = 0;
+clock_sec_t max_utc_sec = 0;
+clock_sec_t last_sys_sec = 0;
+clock_usec_t last_sys_usec = 0;
+#endif
+
+#if DEVELOPMENT || DEBUG
+extern int g_should_log_clock_adjustments;
+
+static void print_all_clock_variables(const char*, clock_sec_t* pmu_secs, clock_usec_t* pmu_usec, clock_sec_t* sys_secs, clock_usec_t* sys_usec, struct clock_calend* calend_cp);
+static void print_all_clock_variables_internal(const char *, struct clock_calend* calend_cp);
+#else
+#define print_all_clock_variables(...) do { } while (0)
+#define print_all_clock_variables_internal(...) do { } while (0)
+#endif
+
+#if	CONFIG_DTRACE
+
 
 /*
- *	Calendar adjustment variables and values.
+ *	Unlocked calendar flipflop; this is used to track a clock_calend such
+ *	that we can safely access a snapshot of a valid  clock_calend structure
+ *	without needing to take any locks to do it.
+ *
+ *	The trick is to use a generation count and set the low bit when it is
+ *	being updated/read; by doing this, we guarantee, through use of the
+ *	hw_atomic functions, that the generation is incremented when the bit
+ *	is cleared atomically (by using a 1 bit add).
  */
-#define calend_adjperiod	(NSEC_PER_SEC / 100)	/* adjustment period, ns */
-#define calend_adjskew		(40 * NSEC_PER_USEC)	/* "standard" skew, ns / period */
-#define	calend_adjbig		(NSEC_PER_SEC)			/* use 10x skew above adjbig ns */
-
-static uint64_t			calend_adjstart;		/* Absolute time value for start of this adjustment period */
-static uint32_t			calend_adjoffset;		/* Absolute time offset for this adjustment period as absolute value */
-
-static int32_t			calend_adjdelta;		/* Nanosecond time delta for this adjustment period */
-static int64_t			calend_adjtotal;		/* Nanosecond remaining total adjustment */
-
-static uint64_t			calend_adjdeadline;		/* Absolute time value for next adjustment period */
-static uint32_t			calend_adjinterval;		/* Absolute time interval of adjustment period */
-
-static timer_call_data_t	calend_adjcall;
-static uint32_t				calend_adjactive;
+static struct unlocked_clock_calend {
+	struct clock_calend	calend;		/* copy of calendar */
+	uint32_t		gen;		/* generation count */
+} flipflop[ 2];
 
-static uint32_t		calend_set_adjustment(
-						int32_t			*secs,
-						int32_t			*microsecs);
+static void clock_track_calend_nowait(void);
 
-static void			calend_adjust_call(void);
-static uint32_t		calend_adjust(void);
+#endif
 
-static thread_call_data_t	calend_wakecall;
+void _clock_delay_until_deadline(uint64_t interval, uint64_t deadline);
+void _clock_delay_until_deadline_with_leeway(uint64_t interval, uint64_t deadline, uint64_t leeway);
 
-extern	void	IOKitResetTime(void);
-
-static uint64_t		clock_boottime;				/* Seconds boottime epoch */
+/* Boottime variables*/
+static uint64_t clock_boottime;
+static uint32_t clock_boottime_usec;
 
 #define TIME_ADD(rsecs, secs, rfrac, frac, unit)	\
 MACRO_BEGIN											\
@@ -108,7 +330,7 @@ MACRO_END
 
 #define TIME_SUB(rsecs, secs, rfrac, frac, unit)	\
 MACRO_BEGIN											\
-	if ((int32_t)((rfrac) -= (frac)) < 0) {			\
+	if ((int)((rfrac) -= (frac)) < 0) {				\
 		(rfrac) += (unit);							\
 		(rsecs) -= 1;								\
 	}												\
@@ -123,17 +345,19 @@ MACRO_END
 void
 clock_config(void)
 {
-	simple_lock_init(&clock_lock, 0);
 
-	timer_call_setup(&calend_adjcall, (timer_call_func_t)calend_adjust_call, NULL);
-	thread_call_setup(&calend_wakecall, (thread_call_func_t)IOKitResetTime, NULL);
+	clock_lock_init();
+
+	settime_lock_grp_attr = lck_grp_attr_alloc_init();
+	settime_lock_grp = lck_grp_alloc_init("settime grp", settime_lock_grp_attr);
+	settime_lock_attr = lck_attr_alloc_init();
+	lck_mtx_init(&settime_lock, settime_lock_grp, settime_lock_attr);
 
 	clock_oldconfig();
 
-	/*
-	 * Initialize the timer callouts.
-	 */
-	timer_call_initialize();
+	ntp_init();
+
+	nanoseconds_to_absolutetime((uint64_t)NSEC_PER_SEC, &ticks_per_sec);
 }
 
 /*
@@ -160,8 +384,8 @@ clock_timebase_init(void)
 {
 	uint64_t	abstime;
 
-	nanoseconds_to_absolutetime(calend_adjperiod, &abstime);
-	calend_adjinterval = abstime;
+	nanoseconds_to_absolutetime(NSEC_PER_SEC / 100, &abstime);
+	hz_tick_interval = (uint32_t)abstime;
 
 	sched_timebase_init();
 }
@@ -176,7 +400,7 @@ mach_timebase_info_trap(
 	struct mach_timebase_info_trap_args *args)
 {
 	mach_vm_address_t 			out_info_addr = args->info;
-	mach_timebase_info_data_t	info;
+	mach_timebase_info_data_t	info = {};
 
 	clock_timebase_info(&info);
 
@@ -197,37 +421,213 @@ mach_timebase_info_trap(
  */
 void
 clock_get_calendar_microtime(
-	uint32_t			*secs,
-	uint32_t			*microsecs)
+	clock_sec_t		*secs,
+	clock_usec_t		*microsecs)
 {
-	uint64_t		now;
-	spl_t			s;
+	clock_get_calendar_absolute_and_microtime(secs, microsecs, NULL);
+}
 
-	s = splclock();
-	simple_lock(&clock_lock);
+/*
+ * get_scale_factors_from_adj:
+ *
+ * computes scale factors from the value given in adjustment.
+ *
+ * Part of the code has been taken from tc_windup of FreeBSD
+ * written by Poul-Henning Kamp <phk@FreeBSD.ORG>, Julien Ridoux and
+ * Konstantin Belousov.
+ * https://github.com/freebsd/freebsd/blob/master/sys/kern/kern_tc.c
+ */
+static void
+get_scale_factors_from_adj(int64_t adjustment, uint64_t* tick_scale_x, uint64_t* s_scale_ns, int64_t* s_adj_nsx)
+{
+	uint64_t scale;
+	int64_t nano, frac;
+
+	/*-
+	 * Calculating the scaling factor.  We want the number of 1/2^64
+	 * fractions of a second per period of the hardware counter, taking
+	 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
+	 * processing provides us with.
+	 *
+	 * The th_adjustment is nanoseconds per second with 32 bit binary
+	 * fraction and we want 64 bit binary fraction of second:
+	 *
+	 *	 x = a * 2^32 / 10^9 = a * 4.294967296
+	 *
+	 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
+	 * we can only multiply by about 850 without overflowing, that
+	 * leaves no suitably precise fractions for multiply before divide.
+	 *
+	 * Divide before multiply with a fraction of 2199/512 results in a
+	 * systematic undercompensation of 10PPM of th_adjustment.  On a
+	 * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
+	 *
+	 * We happily sacrifice the lowest of the 64 bits of our result
+	 * to the goddess of code clarity.
+	 *
+	 */
+	scale = (uint64_t)1 << 63;
+	scale += (adjustment / 1024) * 2199;
+	scale /= ticks_per_sec;
+	*tick_scale_x = scale * 2;
 
-	now = mach_absolute_time();
+	/*
+	 * hi part of adj
+	 * it contains ns (without fraction) to add to the next sec.
+	 * Get ns scale factor for the next sec.
+	 */
+	nano = (adjustment > 0)? adjustment >> 32 : -((-adjustment) >> 32);
+	scale = (uint64_t) NSEC_PER_SEC;
+	scale += nano;
+	*s_scale_ns = scale;
+
+	/*
+	 * lo part of adj
+	 * it contains 32 bit frac of ns to add to the next sec.
+	 * Keep it as additional adjustment for the next sec.
+	 */
+	frac = (adjustment > 0)? ((uint32_t) adjustment) : -((uint32_t) (-adjustment));
+	*s_adj_nsx = (frac>0)? frac << 32 : -( (-frac) << 32);
+
+	return;
+}
 
-	if (calend_adjdelta < 0) {
-		uint32_t	t32;
+/*
+ * scale_delta:
+ *
+ * returns a bintime struct representing delta scaled accordingly to the
+ * scale factors provided to this function.
+ */
+static struct bintime
+scale_delta(uint64_t delta, uint64_t tick_scale_x, uint64_t s_scale_ns, int64_t s_adj_nsx)
+{
+	uint64_t sec, new_ns, over;
+	struct bintime bt;
 
-		if (now > calend_adjstart) {
-			t32 = now - calend_adjstart;
+	bt.sec = 0;
+	bt.frac = 0;
 
-			if (t32 > calend_adjoffset)
-				now -= calend_adjoffset;
-			else
-				now = calend_adjstart;
+	/*
+	 * If more than one second is elapsed,
+	 * scale fully elapsed seconds using scale factors for seconds.
+	 * s_scale_ns -> scales sec to ns.
+	 * s_adj_nsx -> additional adj expressed in 64 bit frac of ns to apply to each sec.
+	 */
+	if (delta > ticks_per_sec) {
+		sec = (delta/ticks_per_sec);
+		new_ns = sec * s_scale_ns;
+		bintime_addns(&bt, new_ns);
+		if (s_adj_nsx) {
+			if (sec == 1) {
+				/* shortcut, no overflow can occur */
+				if (s_adj_nsx > 0)
+					bintime_addx(&bt, (uint64_t)s_adj_nsx/ (uint64_t)NSEC_PER_SEC);
+				else
+					bintime_subx(&bt, (uint64_t)-s_adj_nsx/ (uint64_t)NSEC_PER_SEC);
+			}
+			else{
+				/*
+				 * s_adj_nsx is 64 bit frac of ns.
+				 * sec*s_adj_nsx might overflow in int64_t.
+				 * use bintime_addxns to not lose overflowed ns.
+				 */
+				bintime_addxns(&bt, sec, s_adj_nsx);
+			}
 		}
+		delta = (delta % ticks_per_sec);
+        }
+
+	over = multi_overflow(tick_scale_x, delta);
+	if(over){
+		bt.sec += over;
 	}
 
-	now += clock_calend.offset;
+	/*
+	 * scale elapsed ticks using the scale factor for ticks.
+	 */
+	bintime_addx(&bt, delta * tick_scale_x);
+
+	return bt;
+}
+
+/*
+ * get_scaled_time:
+ *
+ * returns the scaled time of the time elapsed from the last time
+ * scale factors were updated to now.
+ */
+static struct bintime
+get_scaled_time(uint64_t now)
+{
+	uint64_t delta;
+
+	/*
+	 * Compute ticks elapsed since last scale update.
+	 * This time will be scaled according to the value given by ntp kern.
+	 */
+	delta = now - clock_calend.offset_count;
+
+	return scale_delta(delta, clock_calend.tick_scale_x, clock_calend.s_scale_ns, clock_calend.s_adj_nsx);
+}
+
+static void
+clock_get_calendar_absolute_and_microtime_locked(
+	clock_sec_t		*secs,
+	clock_usec_t		*microsecs,
+	uint64_t    		*abstime)
+{
+	uint64_t now;
+	struct bintime bt;
+
+	now  = mach_absolute_time();
+	if (abstime)
+		*abstime = now;
+
+	bt = get_scaled_time(now);
+	bintime_add(&bt, &clock_calend.bintime);
+	bintime2usclock(&bt, secs, microsecs);
+}
+
+static void
+clock_get_calendar_absolute_and_nanotime_locked(
+	clock_sec_t		*secs,
+	clock_usec_t		*nanosecs,
+	uint64_t    		*abstime)
+{
+	uint64_t now;
+	struct bintime bt;
+
+	now  = mach_absolute_time();
+	if (abstime)
+		*abstime = now;
+
+	bt = get_scaled_time(now);
+	bintime_add(&bt, &clock_calend.bintime);
+	bintime2nsclock(&bt, secs, nanosecs);
+}
+
+/*
+ *	clock_get_calendar_absolute_and_microtime:
+ *
+ *	Returns the current calendar value,
+ *	microseconds as the fraction. Also
+ *	returns mach_absolute_time if abstime
+ *	is not NULL.
+ */
+void
+clock_get_calendar_absolute_and_microtime(
+	clock_sec_t		*secs,
+	clock_usec_t		*microsecs,
+	uint64_t    		*abstime)
+{
+	spl_t			s;
 
-	absolutetime_to_microtime(now, secs, microsecs);
+	s = splclock();
+	clock_lock();
 
-	*secs += clock_calend.epoch;
+	clock_get_calendar_absolute_and_microtime_locked(secs, microsecs, abstime);
 
-	simple_unlock(&clock_lock);
+	clock_unlock();
 	splx(s);
 }
 
@@ -243,38 +643,17 @@ clock_get_calendar_microtime(
  */
 void
 clock_get_calendar_nanotime(
-	uint32_t			*secs,
-	uint32_t			*nanosecs)
+	clock_sec_t		*secs,
+	clock_nsec_t		*nanosecs)
 {
-	uint64_t		now;
 	spl_t			s;
 
 	s = splclock();
-	simple_lock(&clock_lock);
-
-	now = mach_absolute_time();
-
-	if (calend_adjdelta < 0) {
-		uint32_t	t32;
+	clock_lock();
 
-		if (now > calend_adjstart) {
-			t32 = now - calend_adjstart;
+	clock_get_calendar_absolute_and_nanotime_locked(secs, nanosecs, NULL);
 
-			if (t32 > calend_adjoffset)
-				now -= calend_adjoffset;
-			else
-				now = calend_adjstart;
-		}
-	}
-
-	now += clock_calend.offset;
-
-	absolutetime_to_microtime(now, secs, nanosecs);
-	*nanosecs *= NSEC_PER_USEC;
-
-	*secs += clock_calend.epoch;
-
-	simple_unlock(&clock_lock);
+	clock_unlock();
 	splx(s);
 }
 
@@ -291,41 +670,56 @@ clock_get_calendar_nanotime(
  */
 void
 clock_gettimeofday(
-	uint32_t			*secs,
-	uint32_t			*microsecs)
+	clock_sec_t	*secs,
+	clock_usec_t	*microsecs)
+{
+	clock_gettimeofday_and_absolute_time(secs, microsecs, NULL);
+}
+
+void
+clock_gettimeofday_and_absolute_time(
+	clock_sec_t	*secs,
+	clock_usec_t	*microsecs,
+	uint64_t	*mach_time)
 {
 	uint64_t		now;
 	spl_t			s;
+	struct bintime 	bt;
 
 	s = splclock();
-	simple_lock(&clock_lock);
+	clock_lock();
 
 	now = mach_absolute_time();
+	bt = get_scaled_time(now);
+	bintime_add(&bt, &clock_calend.bintime);
+	bintime2usclock(&bt, secs, microsecs);
 
-	if (calend_adjdelta >= 0) {
-		clock_gettimeofday_set_commpage(now, clock_calend.epoch, clock_calend.offset, secs, microsecs);
-	}
-	else {
-		uint32_t	t32;
-
-		if (now > calend_adjstart) {
-			t32 = now - calend_adjstart;
-
-			if (t32 > calend_adjoffset)
-				now -= calend_adjoffset;
-			else
-				now = calend_adjstart;
-		}
-
-		now += clock_calend.offset;
+	clock_gettimeofday_set_commpage(now, bt.sec, bt.frac, clock_calend.tick_scale_x, ticks_per_sec);
 
-		absolutetime_to_microtime(now, secs, microsecs);
+	clock_unlock();
+	splx(s);
 
-		*secs += clock_calend.epoch;
+	if (mach_time) {
+		*mach_time = now;
 	}
+}
 
-	simple_unlock(&clock_lock);
-	splx(s);
+static void
+update_basesleep(struct bintime delta, bool forward)
+{
+	/*
+	 * Update basesleep only if the platform does not have monotonic clock.
+	 * In that case the sleep time computation will use the PMU time
+	 * which offset gets modified by settimeofday.
+	 * We don't need this for mononic clock because in that case the sleep
+	 * time computation is independent from the offset value of the PMU.
+	 */
+	if (!has_monotonic_clock) {
+		if (forward)
+			bintime_add(&clock_calend.basesleep, &delta);
+		else
+			bintime_sub(&clock_calend.basesleep, &delta);
+	}
 }
 
 /*
@@ -342,305 +736,681 @@ clock_gettimeofday(
  */
 void
 clock_set_calendar_microtime(
-	uint32_t			secs,
-	uint32_t			microsecs)
+	clock_sec_t		secs,
+	clock_usec_t		microsecs)
 {
-	uint32_t		sys, microsys;
-	uint32_t		newsecs;
+	uint64_t		absolutesys;
+	clock_sec_t		newsecs;
+	clock_sec_t		oldsecs;
+	clock_usec_t        	newmicrosecs;
+	clock_usec_t		oldmicrosecs;
+	uint64_t		commpage_value;
 	spl_t			s;
+	struct bintime		bt;
+	clock_sec_t		deltasecs;
+	clock_usec_t		deltamicrosecs;
 
-	newsecs = (microsecs < 500*USEC_PER_SEC)?
-						secs: secs + 1;
-
-	s = splclock();
-	simple_lock(&clock_lock);
-
-    commpage_set_timestamp(0,0,0);
+	newsecs = secs;
+	newmicrosecs = microsecs;
 
 	/*
-	 *	Calculate the new calendar epoch based on
-	 *	the new value and the system clock.
+	 * settime_lock mtx is used to avoid that racing settimeofdays update the wall clock and
+	 * the platform clock concurrently.
+	 *
+	 * clock_lock cannot be used for this race because it is acquired from interrupt context
+	 * and it needs interrupts disabled while instead updating the platform clock needs to be
+	 * called with interrupts enabled.
 	 */
-	clock_get_system_microtime(&sys, &microsys);
-	TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC);
+	lck_mtx_lock(&settime_lock);
+
+	s = splclock();
+	clock_lock();
+
+#if DEVELOPMENT || DEBUG
+	struct clock_calend clock_calend_cp = clock_calend;
+#endif
+	commpage_disable_timestamp();
 
 	/*
 	 *	Adjust the boottime based on the delta.
 	 */
-	clock_boottime += secs - clock_calend.epoch;
+	clock_get_calendar_absolute_and_microtime_locked(&oldsecs, &oldmicrosecs, &absolutesys);
+
+#if DEVELOPMENT || DEBUG
+	if (g_should_log_clock_adjustments) {
+		os_log(OS_LOG_DEFAULT, "%s wall %lu s %d u computed with %llu abs\n",
+		       __func__, (unsigned long)oldsecs, oldmicrosecs, absolutesys);
+		os_log(OS_LOG_DEFAULT, "%s requested %lu s %d u\n",
+		       __func__,  (unsigned long)secs, microsecs );
+	}
+#endif
 
-	/*
-	 *	Set the new calendar epoch.
-	 */
-	clock_calend.epoch = secs;
-	nanoseconds_to_absolutetime((uint64_t)microsecs * NSEC_PER_USEC, &clock_calend.offset);
+	if (oldsecs < secs || (oldsecs == secs && oldmicrosecs < microsecs)) {
+		// moving forwards
+		deltasecs = secs;
+		deltamicrosecs = microsecs;
 
-	/*
-	 *	Cancel any adjustment in progress.
-	 */
-	calend_adjdelta = calend_adjtotal = 0;
+		TIME_SUB(deltasecs, oldsecs, deltamicrosecs, oldmicrosecs, USEC_PER_SEC);
+
+#if DEVELOPMENT || DEBUG
+		if (g_should_log_clock_adjustments) {
+			os_log(OS_LOG_DEFAULT, "%s delta requested %lu s %d u\n",
+			       __func__, (unsigned long)deltasecs, deltamicrosecs);
+		}
+#endif
+
+		TIME_ADD(clock_boottime, deltasecs, clock_boottime_usec, deltamicrosecs, USEC_PER_SEC);
+		clock2bintime(&deltasecs, &deltamicrosecs, &bt);
+		bintime_add(&clock_calend.boottime, &bt);
+		update_basesleep(bt, TRUE);
+	} else {
+		// moving backwards
+		deltasecs = oldsecs;
+		deltamicrosecs = oldmicrosecs;
+
+		TIME_SUB(deltasecs, secs, deltamicrosecs, microsecs, USEC_PER_SEC);
+#if DEVELOPMENT || DEBUG
+		if (g_should_log_clock_adjustments) {
+			os_log(OS_LOG_DEFAULT, "%s negative delta requested %lu s %d u\n",
+			       __func__, (unsigned long)deltasecs, deltamicrosecs);
+		}
+#endif
+
+		TIME_SUB(clock_boottime, deltasecs, clock_boottime_usec, deltamicrosecs, USEC_PER_SEC);
+		clock2bintime(&deltasecs, &deltamicrosecs, &bt);
+		bintime_sub(&clock_calend.boottime, &bt);
+		update_basesleep(bt, FALSE);
+	}
+
+	clock_calend.bintime = clock_calend.boottime;
+	bintime_add(&clock_calend.bintime, &clock_calend.offset);
 
-	simple_unlock(&clock_lock);
+	clock2bintime((clock_sec_t *) &secs, (clock_usec_t *) &microsecs, &bt);
+
+	clock_gettimeofday_set_commpage(absolutesys, bt.sec, bt.frac, clock_calend.tick_scale_x, ticks_per_sec);
+
+#if DEVELOPMENT || DEBUG
+	struct clock_calend clock_calend_cp1 = clock_calend;
+#endif
+
+	commpage_value = clock_boottime * USEC_PER_SEC + clock_boottime_usec;
+
+	clock_unlock();
+	splx(s);
 
 	/*
 	 *	Set the new value for the platform clock.
+	 *	This call might block, so interrupts must be enabled.
 	 */
-	PESetGMTTimeOfDay(newsecs);
+#if DEVELOPMENT || DEBUG
+	uint64_t now_b = mach_absolute_time();
+#endif
 
-	splx(s);
+	PESetUTCTimeOfDay(newsecs, newmicrosecs);
+
+#if DEVELOPMENT || DEBUG
+	uint64_t now_a = mach_absolute_time();
+	if (g_should_log_clock_adjustments) {
+		os_log(OS_LOG_DEFAULT, "%s mach bef PESet %llu mach aft %llu \n", __func__, now_b, now_a);
+	}
+#endif
+
+	print_all_clock_variables_internal(__func__, &clock_calend_cp);
+	print_all_clock_variables_internal(__func__, &clock_calend_cp1);
+
+	commpage_update_boottime(commpage_value);
 
 	/*
 	 *	Send host notifications.
 	 */
 	host_notify_calendar_change();
+	host_notify_calendar_set();
+
+#if CONFIG_DTRACE
+	clock_track_calend_nowait();
+#endif
+
+	lck_mtx_unlock(&settime_lock);
 }
 
-/*
- *	clock_initialize_calendar:
- *
- *	Set the calendar and related clocks
- *	from the platform clock at boot or
- *	wake event.
- *
- *	Also sends host notifications.
- */
+uint64_t mach_absolutetime_asleep = 0;
+uint64_t mach_absolutetime_last_sleep = 0;
+
 void
-clock_initialize_calendar(void)
+clock_get_calendar_uptime(clock_sec_t *secs)
 {
-	uint32_t		sys, microsys;
-	uint32_t		microsecs = 0, secs = PEGetGMTTimeOfDay();
-	spl_t			s;
+	uint64_t now;
+	spl_t s;
+	struct bintime bt;
 
 	s = splclock();
-	simple_lock(&clock_lock);
-
-    commpage_set_timestamp(0,0,0);
+	clock_lock();
 
-	if ((int32_t)secs >= (int32_t)clock_boottime) {
-		/*
-		 *	Initialize the boot time based on the platform clock.
-		 */
-		if (clock_boottime == 0)
-			clock_boottime = secs;
-
-		/*
-		 *	Calculate the new calendar epoch based on
-		 *	the platform clock and the system clock.
-		 */
-		clock_get_system_microtime(&sys, &microsys);
-		TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC);
+	now = mach_absolute_time();
 
-		/*
-		 *	Set the new calendar epoch.
-		 */
-		clock_calend.epoch = secs;
-		nanoseconds_to_absolutetime((uint64_t)microsecs * NSEC_PER_USEC, &clock_calend.offset);
+	bt = get_scaled_time(now);
+	bintime_add(&bt, &clock_calend.offset);
 
-		/*
-		 *	 Cancel any adjustment in progress.
-		 */
-		calend_adjdelta = calend_adjtotal = 0;
-	}
+	*secs = bt.sec;
 
-	simple_unlock(&clock_lock);
+	clock_unlock();
 	splx(s);
-
-	/*
-	 *	Send host notifications.
-	 */
-	host_notify_calendar_change();
 }
 
-/*
- *	clock_get_boottime_nanotime:
- *
- *	Return the boottime, used by sysctl.
- */
-void
-clock_get_boottime_nanotime(
-	uint32_t			*secs,
-	uint32_t			*nanosecs)
-{
-	*secs = clock_boottime;
-	*nanosecs = 0;
-}
 
 /*
- *	clock_adjtime:
+ * clock_update_calendar:
  *
- *	Interface to adjtime() syscall.
- *
- *	Calculates adjustment variables and
- *	initiates adjustment.
+ * called by ntp timer to update scale factors.
  */
 void
-clock_adjtime(
-	int32_t		*secs,
-	int32_t		*microsecs)
+clock_update_calendar(void)
 {
-	uint32_t	interval;
-	spl_t		s;
+
+	uint64_t now, delta;
+	struct bintime bt;
+	spl_t s;
+	int64_t adjustment;
 
 	s = splclock();
-	simple_lock(&clock_lock);
+	clock_lock();
+
+	now  = mach_absolute_time();
+
+	/*
+	 * scale the time elapsed since the last update and
+	 * add it to offset.
+	 */
+	bt = get_scaled_time(now);
+	bintime_add(&clock_calend.offset, &bt);
+
+	/*
+	 * update the base from which apply next scale factors.
+	 */
+	delta = now - clock_calend.offset_count;
+	clock_calend.offset_count += delta;
 
-	interval = calend_set_adjustment(secs, microsecs);
-	if (interval != 0) {
-		calend_adjdeadline = mach_absolute_time() + interval;
-		if (!timer_call_enter(&calend_adjcall, calend_adjdeadline))
-			calend_adjactive++;
+	clock_calend.bintime = clock_calend.offset;
+	bintime_add(&clock_calend.bintime, &clock_calend.boottime);
+
+	/*
+	 * recompute next adjustment.
+	 */
+	ntp_update_second(&adjustment, clock_calend.bintime.sec);
+
+#if DEVELOPMENT || DEBUG
+	if (g_should_log_clock_adjustments) {
+		os_log(OS_LOG_DEFAULT, "%s adjustment %lld\n", __func__, adjustment);
 	}
-	else
-	if (timer_call_cancel(&calend_adjcall))
-		calend_adjactive--;
+#endif
+	
+	/*
+	 * recomputing scale factors.
+	 */
+	get_scale_factors_from_adj(adjustment, &clock_calend.tick_scale_x, &clock_calend.s_scale_ns, &clock_calend.s_adj_nsx);
+
+	clock_gettimeofday_set_commpage(now, clock_calend.bintime.sec, clock_calend.bintime.frac, clock_calend.tick_scale_x, ticks_per_sec);
+
+#if DEVELOPMENT || DEBUG
+	struct clock_calend calend_cp = clock_calend;
+#endif
 
-	simple_unlock(&clock_lock);
+	clock_unlock();
 	splx(s);
+
+	print_all_clock_variables(__func__, NULL,NULL,NULL,NULL, &calend_cp);
 }
 
-static uint32_t
-calend_set_adjustment(
-	int32_t				*secs,
-	int32_t				*microsecs)
-{
-	uint64_t		now, t64;
-	int64_t			total, ototal;
-	uint32_t		interval = 0;
 
-	total = (int64_t)*secs * NSEC_PER_SEC + *microsecs * NSEC_PER_USEC;
+#if DEVELOPMENT || DEBUG
+
+void print_all_clock_variables_internal(const char* func, struct clock_calend* clock_calend_cp)
+{
+	clock_sec_t     offset_secs;
+	clock_usec_t    offset_microsecs;
+	clock_sec_t     bintime_secs;
+	clock_usec_t    bintime_microsecs;
+	clock_sec_t     bootime_secs;
+	clock_usec_t    bootime_microsecs;
+	
+	if (!g_should_log_clock_adjustments)
+                return;
+
+	bintime2usclock(&clock_calend_cp->offset, &offset_secs, &offset_microsecs);
+	bintime2usclock(&clock_calend_cp->bintime, &bintime_secs, &bintime_microsecs);
+	bintime2usclock(&clock_calend_cp->boottime, &bootime_secs, &bootime_microsecs);
+
+	os_log(OS_LOG_DEFAULT, "%s s_scale_ns %llu s_adj_nsx %lld tick_scale_x %llu offset_count %llu\n",
+	       func , clock_calend_cp->s_scale_ns, clock_calend_cp->s_adj_nsx,
+	       clock_calend_cp->tick_scale_x, clock_calend_cp->offset_count);
+	os_log(OS_LOG_DEFAULT, "%s offset.sec %ld offset.frac %llu offset_secs %lu offset_microsecs %d\n",
+	       func, clock_calend_cp->offset.sec, clock_calend_cp->offset.frac,
+	       (unsigned long)offset_secs, offset_microsecs);
+	os_log(OS_LOG_DEFAULT, "%s bintime.sec %ld bintime.frac %llu bintime_secs %lu bintime_microsecs %d\n",
+	       func, clock_calend_cp->bintime.sec, clock_calend_cp->bintime.frac,
+	       (unsigned long)bintime_secs, bintime_microsecs);
+	os_log(OS_LOG_DEFAULT, "%s bootime.sec %ld bootime.frac %llu bootime_secs %lu bootime_microsecs %d\n",
+	       func, clock_calend_cp->boottime.sec, clock_calend_cp->boottime.frac,
+	       (unsigned long)bootime_secs, bootime_microsecs);
+
+	clock_sec_t     basesleep_secs;
+        clock_usec_t    basesleep_microsecs;
+	
+	bintime2usclock(&clock_calend_cp->basesleep, &basesleep_secs, &basesleep_microsecs);
+	os_log(OS_LOG_DEFAULT, "%s basesleep.sec %ld basesleep.frac %llu basesleep_secs %lu basesleep_microsecs %d\n",
+	       func, clock_calend_cp->basesleep.sec, clock_calend_cp->basesleep.frac,
+	       (unsigned long)basesleep_secs, basesleep_microsecs);
 
-    commpage_set_timestamp(0,0,0);
+}
 
-	now = mach_absolute_time();
 
-	ototal = calend_adjtotal;
+void print_all_clock_variables(const char* func, clock_sec_t* pmu_secs, clock_usec_t* pmu_usec, clock_sec_t* sys_secs, clock_usec_t* sys_usec, struct clock_calend* clock_calend_cp)
+{
+	if (!g_should_log_clock_adjustments)
+		return;
 
-	if (total != 0) {
-		int32_t		delta = calend_adjskew;
+	struct bintime  bt;
+	clock_sec_t     wall_secs;
+	clock_usec_t    wall_microsecs;
+	uint64_t now;
+	uint64_t delta;
 
-		if (total > 0) {
-			if (total > calend_adjbig)
-				delta *= 10;
-			if (delta > total)
-				delta = total;
+	if (pmu_secs) {
+		os_log(OS_LOG_DEFAULT, "%s PMU %lu s %d u \n", func, (unsigned long)*pmu_secs, *pmu_usec); 
+	}
+	if (sys_secs) {
+		os_log(OS_LOG_DEFAULT, "%s sys %lu s %d u \n", func, (unsigned long)*sys_secs, *sys_usec);
+	}
 
-			nanoseconds_to_absolutetime((uint64_t)delta, &t64);
-			calend_adjoffset = t64;
-		}
-		else {
-			if (total < -calend_adjbig)
-				delta *= 10;
-			delta = -delta;
-			if (delta < total)
-				delta = total;
+	print_all_clock_variables_internal(func, clock_calend_cp);
 
-			calend_adjstart = now;
+	now = mach_absolute_time();
+        delta = now - clock_calend_cp->offset_count;
 
-			nanoseconds_to_absolutetime((uint64_t)-delta, &t64);
-			calend_adjoffset = t64;
-		}
+        bt = scale_delta(delta, clock_calend_cp->tick_scale_x, clock_calend_cp->s_scale_ns, clock_calend_cp->s_adj_nsx);
+	bintime_add(&bt, &clock_calend_cp->bintime);
+	bintime2usclock(&bt, &wall_secs, &wall_microsecs);
 
-		calend_adjtotal = total;
-		calend_adjdelta = delta;
+	os_log(OS_LOG_DEFAULT, "%s wall %lu s %d u computed with %llu abs\n",
+	       func, (unsigned long)wall_secs, wall_microsecs, now);
+}
 
-		interval = calend_adjinterval;
-	}
-	else
-		calend_adjdelta = calend_adjtotal = 0;
 
-	if (ototal != 0) {
-		*secs = ototal / NSEC_PER_SEC;
-		*microsecs = (ototal % NSEC_PER_SEC) / NSEC_PER_USEC;
-	}
-	else
-		*secs = *microsecs = 0;
+#endif /* DEVELOPMENT || DEBUG */
 
-	return (interval);
-}
 
-static void
-calend_adjust_call(void)
+/*
+ *	clock_initialize_calendar:
+ *
+ *	Set the calendar and related clocks
+ *	from the platform clock at boot.
+ *
+ *	Also sends host notifications.
+ */
+void
+clock_initialize_calendar(void)
 {
-	uint32_t	interval;
-	spl_t		s;
+	clock_sec_t		sys;  // sleepless time since boot in seconds
+	clock_sec_t		secs; // Current UTC time
+	clock_sec_t		utc_offset_secs; // Difference in current UTC time and sleepless time since boot
+	clock_usec_t		microsys;  
+	clock_usec_t		microsecs; 
+	clock_usec_t		utc_offset_microsecs; 
+	spl_t			s;
+	struct bintime 		bt;
+	struct bintime		monotonic_bt;
+	struct latched_time	monotonic_time;
+	uint64_t		monotonic_usec_total;
+	clock_sec_t             sys2, monotonic_sec;
+        clock_usec_t            microsys2, monotonic_usec;
+        size_t                  size;
+
+	//Get PMU time with offset and corresponding sys time
+	PEGetUTCTimeOfDay(&secs, &microsecs);
+	clock_get_system_microtime(&sys, &microsys);
+
+	/*
+	 * If the platform has a monotonic clock, use kern.monotonicclock_usecs
+	 * to estimate the sleep/wake time, otherwise use the PMU and adjustments
+	 * provided through settimeofday to estimate the sleep time.
+	 * NOTE: the latter case relies that the kernel is the only component
+	 * to set the PMU offset.
+	 */
+	size = sizeof(monotonic_time);
+	if (kernel_sysctlbyname("kern.monotonicclock_usecs", &monotonic_time, &size, NULL, 0) != 0) {
+		has_monotonic_clock = 0;
+		os_log(OS_LOG_DEFAULT, "%s system does not have monotonic clock.\n", __func__);
+	} else {
+		has_monotonic_clock = 1;
+		monotonic_usec_total = monotonic_time.monotonic_time_usec;
+		absolutetime_to_microtime(monotonic_time.mach_time, &sys2, &microsys2);
+		os_log(OS_LOG_DEFAULT, "%s system has monotonic clock.\n", __func__);
+	}
 
 	s = splclock();
-	simple_lock(&clock_lock);
+	clock_lock();
 
-	if (--calend_adjactive == 0) {
-		interval = calend_adjust();
-		if (interval != 0) {
-			clock_deadline_for_periodic_event(interval, mach_absolute_time(),
-																&calend_adjdeadline);
+	commpage_disable_timestamp();
 
-			if (!timer_call_enter(&calend_adjcall, calend_adjdeadline))
-				calend_adjactive++;
-		}
+	utc_offset_secs = secs;
+	utc_offset_microsecs = microsecs;
+
+#if DEVELOPMENT || DEBUG
+	last_utc_sec = secs;
+	last_utc_usec = microsecs;
+	last_sys_sec = sys;
+	last_sys_usec = microsys;
+	if (secs > max_utc_sec)
+		max_utc_sec = secs;
+#endif
+
+	/*
+	 * We normally expect the UTC clock to be always-on and produce
+	 * greater readings than the tick counter.  There may be corner cases
+	 * due to differing clock resolutions (UTC clock is likely lower) and
+	 * and errors reading the UTC clock (some implementations return 0
+	 * on error) in which that doesn't hold true.  Bring the UTC measurements
+	 * in-line with the tick counter measurements as a best effort in that case.
+	 */
+	//FIXME if the current time is prior than 1970 secs will be negative
+	if ((sys > secs) || ((sys == secs) && (microsys > microsecs))) {
+		os_log(OS_LOG_DEFAULT, "%s WARNING: PMU offset is less then sys PMU %lu s %d u sys %lu s %d u\n",
+			__func__, (unsigned long) secs, microsecs, (unsigned long)sys, microsys);
+		secs = utc_offset_secs = sys;
+		microsecs = utc_offset_microsecs = microsys;
+	}
+
+	// PMU time with offset - sys
+	// This macro stores the subtraction result in utc_offset_secs and utc_offset_microsecs
+	TIME_SUB(utc_offset_secs, sys, utc_offset_microsecs, microsys, USEC_PER_SEC);
+
+	clock2bintime(&utc_offset_secs, &utc_offset_microsecs, &bt);
+
+	/*
+	 *	Initialize the boot time based on the platform clock.
+	 */
+	clock_boottime = secs;
+	clock_boottime_usec = microsecs;
+	commpage_update_boottime(clock_boottime * USEC_PER_SEC + clock_boottime_usec);
+
+	nanoseconds_to_absolutetime((uint64_t)NSEC_PER_SEC, &ticks_per_sec);
+	clock_calend.boottime = bt;
+	clock_calend.bintime = bt;
+	clock_calend.offset.sec = 0;
+	clock_calend.offset.frac = 0;
+
+	clock_calend.tick_scale_x = (uint64_t)1 << 63;
+	clock_calend.tick_scale_x /= ticks_per_sec;
+	clock_calend.tick_scale_x *= 2;
+
+	clock_calend.s_scale_ns = NSEC_PER_SEC;
+	clock_calend.s_adj_nsx = 0;
+
+	if (has_monotonic_clock) {
+
+		monotonic_sec = monotonic_usec_total / (clock_sec_t)USEC_PER_SEC;
+		monotonic_usec = monotonic_usec_total % (clock_usec_t)USEC_PER_SEC;
+
+		// PMU time without offset - sys
+		// This macro stores the subtraction result in monotonic_sec and monotonic_usec
+		TIME_SUB(monotonic_sec, sys2, monotonic_usec, microsys2, USEC_PER_SEC);
+		clock2bintime(&monotonic_sec, &monotonic_usec, &monotonic_bt);
+
+		// set the baseleep as the difference between monotonic clock - sys
+		clock_calend.basesleep = monotonic_bt;
+	} else {
+		// set the baseleep as the difference between PMU clock - sys
+		clock_calend.basesleep = bt;
 	}
+	commpage_update_mach_continuous_time(mach_absolutetime_asleep);
+
+#if DEVELOPMENT || DEBUG
+	struct clock_calend clock_calend_cp = clock_calend;
+#endif
 
-	simple_unlock(&clock_lock);
+	clock_unlock();
 	splx(s);
+
+        print_all_clock_variables(__func__, &secs, &microsecs, &sys, &microsys, &clock_calend_cp);
+
+	/*
+	 *	Send host notifications.
+	 */
+	host_notify_calendar_change();
+	
+#if CONFIG_DTRACE
+	clock_track_calend_nowait();
+#endif
 }
 
-static uint32_t
-calend_adjust(void)
-{
-	uint64_t		now, t64;
-	int32_t			delta;
-	uint32_t		interval = 0;
 
-    commpage_set_timestamp(0,0,0);
+void
+clock_wakeup_calendar(void)
+{
+	clock_sec_t		sys;
+	clock_sec_t		secs;
+	clock_usec_t		microsys;
+	clock_usec_t		microsecs;
+	spl_t			s;
+	struct bintime		bt, last_sleep_bt;
+	clock_sec_t             basesleep_s, last_sleep_sec;
+	clock_usec_t            basesleep_us, last_sleep_usec;
+	struct latched_time     monotonic_time;
+	uint64_t		monotonic_usec_total;
+	size_t 			size;
+	clock_sec_t secs_copy;
+        clock_usec_t microsecs_copy;
+#if DEVELOPMENT || DEBUG
+	clock_sec_t utc_sec;
+	clock_usec_t utc_usec;
+	PEGetUTCTimeOfDay(&utc_sec, &utc_usec);
+#endif
 
-	now = mach_absolute_time();
+	/*
+	 * If the platform has the monotonic clock use that to
+	 * compute the sleep time. The monotonic clock does not have an offset
+	 * that can be modified, so nor kernel or userspace can change the time
+	 * of this clock, it can only monotonically increase over time.
+	 * During sleep mach_absolute_time does not tick,
+	 * so the sleep time is the difference betwen the current monotonic time
+	 * less the absolute time and the previous difference stored at wake time.
+	 *
+	 * basesleep = monotonic - sys ---> computed at last wake
+	 * sleep_time = (monotonic - sys) - basesleep
+	 *
+	 * If the platform does not support monotonic time we use the PMU time
+	 * to compute the last sleep.
+	 * The PMU time is the monotonic clock + an offset that can be set
+	 * by kernel.
+	 *
+	 * IMPORTANT:
+	 * We assume that only the kernel is setting the offset of the PMU and that
+	 * it is doing it only througth the settimeofday interface.
+	 *
+	 * basesleep is the different between the PMU time and the mach_absolute_time
+	 * at wake.
+	 * During awake time settimeofday can change the PMU offset by a delta,
+	 * and basesleep is shifted by the same delta applyed to the PMU. So the sleep
+	 * time computation becomes:
+	 *
+	 * PMU = monotonic + PMU_offset
+	 * basesleep = PMU - sys ---> computed at last wake
+	 * basesleep += settimeofday_delta
+	 * PMU_offset += settimeofday_delta
+	 * sleep_time = (PMU - sys) - basesleep
+	 */
+	if (has_monotonic_clock) {
+		//Get monotonic time with corresponding sys time
+		size = sizeof(monotonic_time);
+		if (kernel_sysctlbyname("kern.monotonicclock_usecs", &monotonic_time, &size, NULL, 0) != 0) {
+			panic("%s: could not call kern.monotonicclock_usecs", __func__);
+		}
+		monotonic_usec_total = monotonic_time.monotonic_time_usec;
+		absolutetime_to_microtime(monotonic_time.mach_time, &sys, &microsys);
+
+		secs = monotonic_usec_total / (clock_sec_t)USEC_PER_SEC;
+		microsecs = monotonic_usec_total % (clock_usec_t)USEC_PER_SEC;
+	} else {
+		//Get PMU time with offset and corresponding sys time
+		PEGetUTCTimeOfDay(&secs, &microsecs);
+		clock_get_system_microtime(&sys, &microsys);
 
-	delta = calend_adjdelta;
+	}
 
-	if (delta > 0) {
-		clock_calend.offset += calend_adjoffset;
+	s = splclock();
+	clock_lock();
+	
+	commpage_disable_timestamp();
+
+	secs_copy = secs;
+	microsecs_copy = microsecs;
+
+#if DEVELOPMENT || DEBUG
+	struct clock_calend clock_calend_cp1 = clock_calend;
+#endif /* DEVELOPMENT || DEBUG */
+
+#if DEVELOPMENT || DEBUG
+	last_utc_sec = secs;
+	last_utc_usec = microsecs;
+	last_sys_sec = sys;
+	last_sys_usec = microsys;
+	if (secs > max_utc_sec)
+		max_utc_sec = secs;
+#endif
+	/*
+	 * We normally expect the UTC clock to be always-on and produce
+	 * greater readings than the tick counter.  There may be corner cases
+	 * due to differing clock resolutions (UTC clock is likely lower) and
+	 * and errors reading the UTC clock (some implementations return 0
+	 * on error) in which that doesn't hold true.  Bring the UTC measurements
+	 * in-line with the tick counter measurements as a best effort in that case.
+	 */
+	//FIXME if the current time is prior than 1970 secs will be negative
+	if ((sys > secs) || ((sys == secs) && (microsys > microsecs))) {
+		os_log(OS_LOG_DEFAULT, "%s WARNING: %s is less then sys %s %lu s %d u sys %lu s %d u\n",
+			__func__, (has_monotonic_clock)?"monotonic":"PMU", (has_monotonic_clock)?"monotonic":"PMU", (unsigned long)secs, microsecs, (unsigned long)sys, microsys);
+		secs = sys;
+		microsecs = microsys;
+	}
 
-		calend_adjtotal -= delta;
-		if (delta > calend_adjtotal) {
-			calend_adjdelta = delta = calend_adjtotal;
+	// PMU or monotonic - sys
+	// This macro stores the subtraction result in secs and microsecs
+	TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC);
+	clock2bintime(&secs, &microsecs, &bt);
 
-			nanoseconds_to_absolutetime((uint64_t)delta, &t64);
-			calend_adjoffset = t64;
-		}
+	/*
+	 * Safety belt: the UTC clock will likely have a lower resolution than the tick counter.
+	 * It's also possible that the device didn't fully transition to the powered-off state on
+	 * the most recent sleep, so the tick counter may not have reset or may have only briefly
+	 * tured off.  In that case it's possible for the difference between the UTC clock and the
+	 * tick counter to be less than the previously recorded value in clock.calend.basesleep.
+	 * In that case simply record that we slept for 0 ticks.
+	 */ 
+	if ((bt.sec > clock_calend.basesleep.sec) ||
+	    ((bt.sec == clock_calend.basesleep.sec) && (bt.frac > clock_calend.basesleep.frac))) {
+
+		//last_sleep is the difference between current PMU or monotonic - abs and last wake PMU or monotonic - abs
+		last_sleep_bt = bt;
+		bintime_sub(&last_sleep_bt, &clock_calend.basesleep);
+
+		//set baseseep to current PMU or monotonic - abs
+		clock_calend.basesleep = bt;
+		bintime2usclock(&last_sleep_bt, &last_sleep_sec, &last_sleep_usec);
+		bintime2absolutetime(&last_sleep_bt, &mach_absolutetime_last_sleep);
+		mach_absolutetime_asleep += mach_absolutetime_last_sleep;
+
+		bintime_add(&clock_calend.offset, &last_sleep_bt);
+		bintime_add(&clock_calend.bintime, &last_sleep_bt);
+
+	} else{
+		mach_absolutetime_last_sleep = 0;
+		last_sleep_sec = last_sleep_usec = 0;
+		bintime2usclock(&clock_calend.basesleep, &basesleep_s, &basesleep_us);
+		os_log(OS_LOG_DEFAULT, "%s WARNING: basesleep (%lu s %d u)  > %s-sys (%lu s %d u) \n",
+			__func__, (unsigned long) basesleep_s, basesleep_us, (has_monotonic_clock)?"monotonic":"PMU", (unsigned long) secs_copy, microsecs_copy );
 	}
-	else
-	if (delta < 0) {
-		clock_calend.offset -= calend_adjoffset;
 
-		calend_adjtotal -= delta;
-		if (delta < calend_adjtotal) {
-			calend_adjdelta = delta = calend_adjtotal;
+	KERNEL_DEBUG_CONSTANT(
+		  MACHDBG_CODE(DBG_MACH_CLOCK,MACH_EPOCH_CHANGE) | DBG_FUNC_NONE,
+		  (uintptr_t) mach_absolutetime_last_sleep,
+		  (uintptr_t) mach_absolutetime_asleep,
+		  (uintptr_t) (mach_absolutetime_last_sleep >> 32),
+		  (uintptr_t) (mach_absolutetime_asleep >> 32),
+		  0);
 
-			nanoseconds_to_absolutetime((uint64_t)-delta, &t64);
-			calend_adjoffset = t64;
-		}
+	commpage_update_mach_continuous_time(mach_absolutetime_asleep);
+	adjust_cont_time_thread_calls();
 
-		if (calend_adjdelta != 0)
-			calend_adjstart = now;
+#if DEVELOPMENT || DEBUG
+	struct clock_calend clock_calend_cp = clock_calend;
+#endif
+
+	clock_unlock();
+	splx(s);
+
+#if DEVELOPMENT || DEBUG
+	if (g_should_log_clock_adjustments) {
+		os_log(OS_LOG_DEFAULT, "PMU was %lu s %d u\n",(unsigned long) utc_sec, utc_usec);
+		os_log(OS_LOG_DEFAULT, "last sleep was %lu s %d u\n",(unsigned long) last_sleep_sec, last_sleep_usec);
+		print_all_clock_variables("clock_wakeup_calendar:BEFORE",
+	                          &secs_copy, &microsecs_copy, &sys, &microsys, &clock_calend_cp1);
+		print_all_clock_variables("clock_wakeup_calendar:AFTER", NULL, NULL, NULL, NULL, &clock_calend_cp);
 	}
+#endif /* DEVELOPMENT || DEBUG */
 
-	if (calend_adjdelta != 0)
-		interval = calend_adjinterval;
+	host_notify_calendar_change();
 
-	return (interval);
+#if CONFIG_DTRACE
+	clock_track_calend_nowait();
+#endif
 }
 
+
 /*
- *	clock_wakeup_calendar:
+ *	clock_get_boottime_nanotime:
  *
- *	Interface to power management, used
- *	to initiate the reset of the calendar
- *	on wake from sleep event.
+ *	Return the boottime, used by sysctl.
  */
 void
-clock_wakeup_calendar(void)
+clock_get_boottime_nanotime(
+	clock_sec_t			*secs,
+	clock_nsec_t		*nanosecs)
 {
-	thread_call_enter(&calend_wakecall);
+	spl_t	s;
+
+	s = splclock();
+	clock_lock();
+
+	*secs = (clock_sec_t)clock_boottime;
+	*nanosecs = (clock_nsec_t)clock_boottime_usec * NSEC_PER_USEC;
+
+	clock_unlock();
+	splx(s);
 }
 
+/*
+ *	clock_get_boottime_nanotime:
+ *
+ *	Return the boottime, used by sysctl.
+ */
+void
+clock_get_boottime_microtime(
+	clock_sec_t			*secs,
+	clock_usec_t		*microsecs)
+{
+	spl_t	s;
+
+	s = splclock();
+	clock_lock();
+
+	*secs = (clock_sec_t)clock_boottime;
+	*microsecs = (clock_nsec_t)clock_boottime_usec;
+
+	clock_unlock();
+	splx(s);
+}
+
+
 /*
  *	Wait / delay routines.
  */
@@ -653,6 +1423,15 @@ mach_wait_until_continue(
 	/*NOTREACHED*/
 }
 
+/*
+ * mach_wait_until_trap: Suspend execution of calling thread until the specified time has passed
+ *
+ * Parameters:    args->deadline          Amount of time to wait
+ *
+ * Returns:        0                      Success
+ *                !0                      Not success           
+ *
+ */
 kern_return_t
 mach_wait_until_trap(
 	struct mach_wait_until_trap_args	*args)
@@ -660,7 +1439,8 @@ mach_wait_until_trap(
 	uint64_t		deadline = args->deadline;
 	wait_result_t	wresult;
 
-	wresult = assert_wait_deadline((event_t)mach_wait_until_trap, THREAD_ABORTSAFE, deadline);
+	wresult = assert_wait_deadline_with_leeway((event_t)mach_wait_until_trap, THREAD_ABORTSAFE,
+						   TIMEOUT_URGENCY_USER_NORMAL, deadline, 0);
 	if (wresult == THREAD_WAITING)
 		wresult = thread_block(mach_wait_until_continue);
 
@@ -676,12 +1456,50 @@ clock_delay_until(
 	if (now >= deadline)
 		return;
 
-	if (	(deadline - now) < (8 * sched_cswtime)	||
+	_clock_delay_until_deadline(deadline - now, deadline);
+}
+
+/*
+ * Preserve the original precise interval that the client
+ * requested for comparison to the spin threshold.
+ */
+void
+_clock_delay_until_deadline(
+	uint64_t		interval,
+	uint64_t		deadline)
+{
+	_clock_delay_until_deadline_with_leeway(interval, deadline, 0);
+}
+
+/*
+ * Like _clock_delay_until_deadline, but it accepts a
+ * leeway value.
+ */
+void
+_clock_delay_until_deadline_with_leeway(
+	uint64_t		interval,
+	uint64_t		deadline,
+	uint64_t		leeway)
+{
+
+	if (interval == 0)
+		return;
+
+	if (	ml_delay_should_spin(interval)	||
 			get_preemption_level() != 0				||
-			ml_get_interrupts_enabled() == FALSE	)
-		machine_delay_until(deadline);
-	else {
-		assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline - sched_cswtime);
+			ml_get_interrupts_enabled() == FALSE	) {
+		machine_delay_until(interval, deadline);
+	} else {
+		/*
+		 * For now, assume a leeway request of 0 means the client does not want a leeway
+		 * value. We may want to change this interpretation in the future.
+		 */
+
+		if (leeway) {
+			assert_wait_deadline_with_leeway((event_t)clock_delay_until, THREAD_UNINT, TIMEOUT_URGENCY_LEEWAY, deadline, leeway);
+		} else {
+			assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline);
+		}
 
 		thread_block(THREAD_CONTINUE_NULL);
 	}
@@ -692,11 +1510,26 @@ delay_for_interval(
 	uint32_t		interval,
 	uint32_t		scale_factor)
 {
-	uint64_t		end;
+	uint64_t		abstime;
+
+	clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
+
+	_clock_delay_until_deadline(abstime, mach_absolute_time() + abstime);
+}
+
+void
+delay_for_interval_with_leeway(
+	uint32_t		interval,
+	uint32_t		leeway,
+	uint32_t		scale_factor)
+{
+	uint64_t		abstime_interval;
+	uint64_t		abstime_leeway;
 
-	clock_interval_to_deadline(interval, scale_factor, &end);
+	clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime_interval);
+	clock_interval_to_absolutetime_interval(leeway, scale_factor, &abstime_leeway);
 
-	clock_delay_until(end);
+	_clock_delay_until_deadline_with_leeway(abstime_interval, mach_absolute_time() + abstime_interval, abstime_leeway);
 }
 
 void
@@ -730,6 +1563,14 @@ clock_absolutetime_interval_to_deadline(
 	*result = mach_absolute_time() + abstime;
 }
 
+void
+clock_continuoustime_interval_to_deadline(
+	uint64_t			conttime,
+	uint64_t			*result)
+{
+	*result = mach_continuous_time() + conttime;
+}
+
 void
 clock_get_uptime(
 	uint64_t	*result)
@@ -755,3 +1596,145 @@ clock_deadline_for_periodic_event(
 			*deadline = abstime + interval;
 	}
 }
+
+uint64_t
+mach_continuous_time(void)
+{
+	while(1) {	
+		uint64_t read1 = mach_absolutetime_asleep;
+		uint64_t absolute = mach_absolute_time();
+		OSMemoryBarrier();
+		uint64_t read2 = mach_absolutetime_asleep;
+
+		if(__builtin_expect(read1 == read2, 1)) {
+			return absolute + read1;
+		}
+	}
+}
+
+uint64_t
+mach_continuous_approximate_time(void)
+{
+	while(1) {
+		uint64_t read1 = mach_absolutetime_asleep;
+		uint64_t absolute = mach_approximate_time();
+		OSMemoryBarrier();
+		uint64_t read2 = mach_absolutetime_asleep;
+
+		if(__builtin_expect(read1 == read2, 1)) {
+			return absolute + read1;
+		}
+	}
+}
+
+/*
+ * continuoustime_to_absolutetime
+ * Must be called with interrupts disabled
+ * Returned value is only valid until the next update to
+ * mach_continuous_time 
+ */
+uint64_t
+continuoustime_to_absolutetime(uint64_t conttime) {
+	if (conttime <= mach_absolutetime_asleep)
+		return 0;
+	else
+		return conttime - mach_absolutetime_asleep;
+}
+
+/*
+ * absolutetime_to_continuoustime
+ * Must be called with interrupts disabled
+ * Returned value is only valid until the next update to
+ * mach_continuous_time 
+ */
+uint64_t
+absolutetime_to_continuoustime(uint64_t abstime) {
+	return abstime + mach_absolutetime_asleep;
+}
+
+#if	CONFIG_DTRACE
+
+/*
+ * clock_get_calendar_nanotime_nowait
+ *
+ * Description:	Non-blocking version of clock_get_calendar_nanotime()
+ *
+ * Notes:	This function operates by separately tracking calendar time
+ *		updates using a two element structure to copy the calendar
+ *		state, which may be asynchronously modified.  It utilizes
+ *		barrier instructions in the tracking process and in the local
+ *		stable snapshot process in order to ensure that a consistent
+ *		snapshot is used to perform the calculation.
+ */
+void
+clock_get_calendar_nanotime_nowait(
+	clock_sec_t			*secs,
+	clock_nsec_t		*nanosecs)
+{
+	int i = 0;
+	uint64_t		now;
+	struct unlocked_clock_calend stable;
+	struct bintime bt;
+
+	for (;;) {
+		stable = flipflop[i];		/* take snapshot */
+
+		/*
+		 * Use a barrier instructions to ensure atomicity.  We AND
+		 * off the "in progress" bit to get the current generation
+		 * count.
+		 */
+		(void)hw_atomic_and(&stable.gen, ~(uint32_t)1);
+
+		/*
+		 * If an update _is_ in progress, the generation count will be
+		 * off by one, if it _was_ in progress, it will be off by two,
+		 * and if we caught it at a good time, it will be equal (and
+		 * our snapshot is threfore stable).
+		 */
+		if (flipflop[i].gen == stable.gen)
+			break;
+
+		/* Switch to the other element of the flipflop, and try again. */
+		i ^= 1;
+	}
+
+	now = mach_absolute_time();
+
+	bt = get_scaled_time(now);
+
+	bintime_add(&bt, &clock_calend.bintime);
+
+	bintime2nsclock(&bt, secs, nanosecs);
+}
+
+static void 
+clock_track_calend_nowait(void)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct clock_calend tmp = clock_calend;
+
+		/*
+		 * Set the low bit if the generation count; since we use a
+		 * barrier instruction to do this, we are guaranteed that this
+		 * will flag an update in progress to an async caller trying
+		 * to examine the contents.
+		 */
+		(void)hw_atomic_or(&flipflop[i].gen, 1);
+
+		flipflop[i].calend = tmp;
+
+		/*
+		 * Increment the generation count to clear the low bit to
+		 * signal completion.  If a caller compares the generation
+		 * count after taking a copy while in progress, the count
+		 * will be off by two.
+		 */
+		(void)hw_atomic_add(&flipflop[i].gen, 1);
+	}
+}
+
+#endif	/* CONFIG_DTRACE */
+