X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/55e303ae13a4cf49d70f2294092726f2fffb9ef2..d26ffc64f583ab2d29df48f13518685602bc8832:/osfmk/kern/clock.c

diff --git a/osfmk/kern/clock.c b/osfmk/kern/clock.c
index 79b348c77..9bd9f3b0e 100644
--- a/osfmk/kern/clock.c
+++ b/osfmk/kern/clock.c
@@ -1,16 +1,19 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -20,801 +23,1559 @@
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
  */
 /*
- *	File:		kern/clock.c
- *	Purpose:	Routines for the creation and use of kernel
- *			alarm clock services. This file and the ipc
- *			routines in kern/ipc_clock.c constitute the
- *			machine-independent clock service layer.
+ */
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)time.h	8.5 (Berkeley) 5/4/95
+ * $FreeBSD$
  */
 
-#include <cpus.h>
-#include <mach_host.h>
-
-#include <mach/boolean.h>
-#include <mach/processor_info.h>
-#include <mach/vm_param.h>
-#include <machine/mach_param.h>
-#include <kern/cpu_number.h>
-#include <kern/misc_protos.h>
-#include <kern/lock.h>
-#include <kern/host.h>
+#include <mach/mach_types.h>
+
 #include <kern/spl.h>
 #include <kern/sched_prim.h>
 #include <kern/thread.h>
-#include <kern/thread_swap.h>
-#include <kern/ipc_host.h>
 #include <kern/clock.h>
-#include <kern/zalloc.h>
-#include <ipc/ipc_port.h>
+#include <kern/host_notify.h>
+#include <kern/thread_call.h>
+#include <libkern/OSAtomic.h>
+
+#include <IOKit/IOPlatformExpert.h>
 
-#include <mach/mach_syscalls.h>
-#include <mach/clock_reply.h>
+#include <machine/commpage.h>
+#include <machine/config.h>
+#include <machine/machine_routines.h>
+
+#include <mach/mach_traps.h>
 #include <mach/mach_time.h>
 
-/*
- * Exported interface
- */
+#include <sys/kdebug.h>
+#include <sys/timex.h>
+#include <kern/arithmetic_128.h>
+#include <os/log.h>
+
+uint32_t	hz_tick_interval = 1;
+static uint64_t has_monotonic_clock = 0;
+
+decl_simple_lock_data(,clock_lock)
+lck_grp_attr_t * settime_lock_grp_attr;
+lck_grp_t * settime_lock_grp;
+lck_attr_t * settime_lock_attr;
+lck_mtx_t settime_lock;
+
+#define clock_lock()	\
+	simple_lock(&clock_lock)
+
+#define clock_unlock()	\
+	simple_unlock(&clock_lock)
+
+#define clock_lock_init()	\
+	simple_lock_init(&clock_lock, 0)
+
+#ifdef kdp_simple_lock_is_acquired
+boolean_t kdp_clock_is_locked()
+{
+	return kdp_simple_lock_is_acquired(&clock_lock);
+}
+#endif
+
+struct bintime {
+	time_t	sec;
+	uint64_t frac;
+};
+
+static __inline void
+bintime_addx(struct bintime *_bt, uint64_t _x)
+{
+	uint64_t _u;
+
+	_u = _bt->frac;
+	_bt->frac += _x;
+	if (_u > _bt->frac)
+		_bt->sec++;
+}
+
+static __inline void
+bintime_subx(struct bintime *_bt, uint64_t _x)
+{
+	uint64_t _u;
+
+	_u = _bt->frac;
+	_bt->frac -= _x;
+	if (_u < _bt->frac)
+		_bt->sec--;
+}
+
+static __inline void
+bintime_addns(struct bintime *bt, uint64_t ns)
+{
+	bt->sec += ns/ (uint64_t)NSEC_PER_SEC;
+	ns = ns % (uint64_t)NSEC_PER_SEC;
+	if (ns) {
+		/* 18446744073 = int(2^64 / NSEC_PER_SEC) */
+		ns = ns * (uint64_t)18446744073LL;
+		bintime_addx(bt, ns);
+	}
+}
 
-#include <mach/clock_server.h>
-#include <mach/mach_host_server.h>
+static __inline void
+bintime_subns(struct bintime *bt, uint64_t ns)
+{
+	bt->sec -= ns/ (uint64_t)NSEC_PER_SEC;
+	ns = ns % (uint64_t)NSEC_PER_SEC;
+	if (ns) {
+		/* 18446744073 = int(2^64 / NSEC_PER_SEC) */
+		ns = ns * (uint64_t)18446744073LL;
+		bintime_subx(bt, ns);
+	}
+}
 
-/* local data declarations */
-decl_simple_lock_data(static,ClockLock)		/* clock system synchronization */
-static struct	zone		*alarm_zone;	/* zone for user alarms */
-static struct	alarm		*alrmfree;		/* alarm free list pointer */
-static struct	alarm		*alrmdone;		/* alarm done list pointer */
-static long					alrm_seqno;		/* uniquely identifies alarms */
-static thread_call_data_t	alarm_deliver;
+static __inline void
+bintime_addxns(struct bintime *bt, uint64_t a, int64_t xns)
+{
+	uint64_t uxns = (xns > 0)?(uint64_t )xns:(uint64_t)-xns;
+	uint64_t ns = multi_overflow(a, uxns);
+	if (xns > 0) {
+		if (ns)
+			bintime_addns(bt, ns);
+		ns = (a * uxns) / (uint64_t)NSEC_PER_SEC;
+		bintime_addx(bt, ns);
+	}
+	else{
+		if (ns)
+			bintime_subns(bt, ns);
+		ns = (a * uxns) / (uint64_t)NSEC_PER_SEC;
+		bintime_subx(bt,ns);
+	}
+}
+
+
+static __inline void
+bintime_add(struct bintime *_bt, const struct bintime *_bt2)
+{
+	uint64_t _u;
 
-decl_simple_lock_data(static,calend_adjlock)
+	_u = _bt->frac;
+	_bt->frac += _bt2->frac;
+	if (_u > _bt->frac)
+		_bt->sec++;
+	_bt->sec += _bt2->sec;
+}
+
+static __inline void
+bintime_sub(struct bintime *_bt, const struct bintime *_bt2)
+{
+	uint64_t _u;
 
-static timer_call_data_t	calend_adjcall;
-static uint64_t				calend_adjinterval, calend_adjdeadline;
+	_u = _bt->frac;
+	_bt->frac -= _bt2->frac;
+	if (_u < _bt->frac)
+		_bt->sec--;
+	_bt->sec -= _bt2->sec;
+}
 
-static thread_call_data_t	calend_wakecall;
+static __inline void
+clock2bintime(const clock_sec_t *secs, const clock_usec_t *microsecs, struct bintime *_bt)
+{
 
-/* backwards compatibility */
-int             hz = HZ;                /* GET RID OF THIS !!! */
-int             tick = (1000000 / HZ);  /* GET RID OF THIS !!! */
+	_bt->sec = *secs;
+	/* 18446744073709 = int(2^64 / 1000000) */
+	_bt->frac = *microsecs * (uint64_t)18446744073709LL;
+}
 
-/* external declarations */
-extern	struct clock	clock_list[];
-extern	int		clock_count;
+static __inline void
+bintime2usclock(const struct bintime *_bt, clock_sec_t *secs, clock_usec_t *microsecs)
+{
 
-/* local clock subroutines */
-static
-void	flush_alarms(
-			clock_t			clock);
+	*secs = _bt->sec;
+	*microsecs = ((uint64_t)USEC_PER_SEC * (uint32_t)(_bt->frac >> 32)) >> 32;
+}
 
-static
-void	post_alarm(
-			clock_t			clock,
-			alarm_t			alarm);
+static __inline void
+bintime2nsclock(const struct bintime *_bt, clock_sec_t *secs, clock_usec_t *nanosecs)
+{
 
-static
-int		check_time(
-			alarm_type_t	alarm_type,
-			mach_timespec_t	*alarm_time,
-			mach_timespec_t	*clock_time);
+	*secs = _bt->sec;
+	*nanosecs = ((uint64_t)NSEC_PER_SEC * (uint32_t)(_bt->frac >> 32)) >> 32;
+}
 
-static
-void	clock_alarm_deliver(
-			thread_call_param_t		p0,
-			thread_call_param_t		p1);
+static __inline void
+bintime2absolutetime(const struct bintime *_bt, uint64_t *abs)
+{
+	uint64_t nsec;
+	nsec = (uint64_t) _bt->sec * (uint64_t)NSEC_PER_SEC + (((uint64_t)NSEC_PER_SEC * (uint32_t)(_bt->frac >> 32)) >> 32);
+	nanoseconds_to_absolutetime(nsec, abs);
+}
 
-static
-void	calend_adjust_call(
-			timer_call_param_t	p0,
-			timer_call_param_t	p1);
+struct latched_time {
+        uint64_t monotonic_time_usec;
+        uint64_t mach_time;
+};
 
-static
-void	calend_dowakeup(
-			thread_call_param_t		p0,
-			thread_call_param_t		p1);
+extern int
+kernel_sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
 /*
- *	Macros to lock/unlock clock system.
+ *	Time of day (calendar) variables.
+ *
+ *	Algorithm:
+ *
+ *	TOD <- bintime + delta*scale
+ *
+ *	where :
+ * 	bintime is a cumulative offset that includes bootime and scaled time elapsed betweed bootime and last scale update.
+ *	delta is ticks elapsed since last scale update.
+ *	scale is computed according to an adjustment provided by ntp_kern.
  */
-#define LOCK_CLOCK(s)			\
-	s = splclock();			\
-	simple_lock(&ClockLock);
+static struct clock_calend {
+	uint64_t		s_scale_ns; /* scale to apply for each second elapsed, it converts in ns */
+	int64_t			s_adj_nsx; /* additional adj to apply for each second elapsed, it is expressed in 64 bit frac of ns */
+	uint64_t		tick_scale_x; /* scale to apply for each tick elapsed, it converts in 64 bit frac of s */
+	uint64_t 		offset_count; /* abs time from which apply current scales */
+	struct bintime		offset; /* cumulative offset expressed in (sec, 64 bits frac of a second) */
+	struct bintime		bintime; /* cumulative offset (it includes bootime) expressed in (sec, 64 bits frac of a second) */
+	struct bintime		boottime; /* boot time expressed in (sec, 64 bits frac of a second) */
+	struct bintime		basesleep;
+} clock_calend;
+
+static uint64_t ticks_per_sec; /* ticks in a second (expressed in abs time) */
+
+#if DEVELOPMENT || DEBUG
+clock_sec_t last_utc_sec = 0;
+clock_usec_t last_utc_usec = 0;
+clock_sec_t max_utc_sec = 0;
+clock_sec_t last_sys_sec = 0;
+clock_usec_t last_sys_usec = 0;
+#endif
+
+#if DEVELOPMENT || DEBUG
+extern int g_should_log_clock_adjustments;
+
+static void print_all_clock_variables(const char*, clock_sec_t* pmu_secs, clock_usec_t* pmu_usec, clock_sec_t* sys_secs, clock_usec_t* sys_usec, struct clock_calend* calend_cp);
+static void print_all_clock_variables_internal(const char *, struct clock_calend* calend_cp);
+#else
+#define print_all_clock_variables(...) do { } while (0)
+#define print_all_clock_variables_internal(...) do { } while (0)
+#endif
+
+#if	CONFIG_DTRACE
 
-#define UNLOCK_CLOCK(s)			\
-	simple_unlock(&ClockLock);	\
-	splx(s);
 
 /*
- * Configure the clock system. (Not sure if we need this,
- * as separate from clock_init()).
+ *	Unlocked calendar flipflop; this is used to track a clock_calend such
+ *	that we can safely access a snapshot of a valid  clock_calend structure
+ *	without needing to take any locks to do it.
+ *
+ *	The trick is to use a generation count and set the low bit when it is
+ *	being updated/read; by doing this, we guarantee, through use of the
+ *	hw_atomic functions, that the generation is incremented when the bit
+ *	is cleared atomically (by using a 1 bit add).
+ */
+static struct unlocked_clock_calend {
+	struct clock_calend	calend;		/* copy of calendar */
+	uint32_t		gen;		/* generation count */
+} flipflop[ 2];
+
+static void clock_track_calend_nowait(void);
+
+#endif
+
+void _clock_delay_until_deadline(uint64_t interval, uint64_t deadline);
+void _clock_delay_until_deadline_with_leeway(uint64_t interval, uint64_t deadline, uint64_t leeway);
+
+/* Boottime variables*/
+static uint64_t clock_boottime;
+static uint32_t clock_boottime_usec;
+
+#define TIME_ADD(rsecs, secs, rfrac, frac, unit)	\
+MACRO_BEGIN											\
+	if (((rfrac) += (frac)) >= (unit)) {			\
+		(rfrac) -= (unit);							\
+		(rsecs) += 1;								\
+	}												\
+	(rsecs) += (secs);								\
+MACRO_END
+
+#define TIME_SUB(rsecs, secs, rfrac, frac, unit)	\
+MACRO_BEGIN											\
+	if ((int)((rfrac) -= (frac)) < 0) {				\
+		(rfrac) += (unit);							\
+		(rsecs) -= 1;								\
+	}												\
+	(rsecs) -= (secs);								\
+MACRO_END
+
+/*
+ *	clock_config:
+ *
+ *	Called once at boot to configure the clock subsystem.
  */
 void
 clock_config(void)
 {
-	clock_t			clock;
-	register int 	i;
-
-	if (cpu_number() != master_cpu)
-		panic("clock_config");
 
-	simple_lock_init(&ClockLock, ETAP_MISC_CLOCK);
-	thread_call_setup(&alarm_deliver, clock_alarm_deliver, NULL);
+	clock_lock_init();
 
-	simple_lock_init(&calend_adjlock, ETAP_MISC_CLOCK);
-	timer_call_setup(&calend_adjcall, calend_adjust_call, NULL);
+	settime_lock_grp_attr = lck_grp_attr_alloc_init();
+	settime_lock_grp = lck_grp_alloc_init("settime grp", settime_lock_grp_attr);
+	settime_lock_attr = lck_attr_alloc_init();
+	lck_mtx_init(&settime_lock, settime_lock_grp, settime_lock_attr);
 
-	thread_call_setup(&calend_wakecall, calend_dowakeup, NULL);
+	clock_oldconfig();
 
-	/*
-	 * Configure clock devices.
-	 */
-	for (i = 0; i < clock_count; i++) {
-		clock = &clock_list[i];
-		if (clock->cl_ops) {
-			if ((*clock->cl_ops->c_config)() == 0)
-				clock->cl_ops = 0;
-		}
-	}
+	ntp_init();
 
-	/* start alarm sequence numbers at 0 */
-	alrm_seqno = 0;
+	nanoseconds_to_absolutetime((uint64_t)NSEC_PER_SEC, &ticks_per_sec);
 }
 
 /*
- * Initialize the clock system.
+ *	clock_init:
+ *
+ *	Called on a processor each time started.
  */
 void
 clock_init(void)
 {
-	clock_t			clock;
-	register int	i;
-
-	/*
-	 * Initialize basic clock structures.
-	 */
-	for (i = 0; i < clock_count; i++) {
-		clock = &clock_list[i];
-		if (clock->cl_ops)
-			(*clock->cl_ops->c_init)();
-	}
+	clock_oldinit();
 }
 
 /*
- * Called by machine dependent code
- * to initialize areas dependent on the
- * timebase value.  May be called multiple
- * times during start up.
+ *	clock_timebase_init:
+ *
+ *	Called by machine dependent code
+ *	to initialize areas dependent on the
+ *	timebase value.  May be called multiple
+ *	times during start up.
  */
 void
 clock_timebase_init(void)
 {
+	uint64_t	abstime;
+
+	nanoseconds_to_absolutetime(NSEC_PER_SEC / 100, &abstime);
+	hz_tick_interval = (uint32_t)abstime;
+
 	sched_timebase_init();
 }
 
 /*
- * Initialize the clock ipc service facility.
+ *	mach_timebase_info_trap:
+ *
+ *	User trap returns timebase constant.
+ */
+kern_return_t
+mach_timebase_info_trap(
+	struct mach_timebase_info_trap_args *args)
+{
+	mach_vm_address_t 			out_info_addr = args->info;
+	mach_timebase_info_data_t	info = {};
+
+	clock_timebase_info(&info);
+
+	copyout((void *)&info, out_info_addr, sizeof (info));
+
+	return (KERN_SUCCESS);
+}
+
+/*
+ *	Calendar routines.
+ */
+
+/*
+ *	clock_get_calendar_microtime:
+ *
+ *	Returns the current calendar value,
+ *	microseconds as the fraction.
  */
 void
-clock_service_create(void)
+clock_get_calendar_microtime(
+	clock_sec_t		*secs,
+	clock_usec_t		*microsecs)
 {
-	clock_t			clock;
-	register int	i;
+	clock_get_calendar_absolute_and_microtime(secs, microsecs, NULL);
+}
+
+/*
+ * get_scale_factors_from_adj:
+ *
+ * computes scale factors from the value given in adjustment.
+ *
+ * Part of the code has been taken from tc_windup of FreeBSD
+ * written by Poul-Henning Kamp <phk@FreeBSD.ORG>, Julien Ridoux and
+ * Konstantin Belousov.
+ * https://github.com/freebsd/freebsd/blob/master/sys/kern/kern_tc.c
+ */
+static void
+get_scale_factors_from_adj(int64_t adjustment, uint64_t* tick_scale_x, uint64_t* s_scale_ns, int64_t* s_adj_nsx)
+{
+	uint64_t scale;
+	int64_t nano, frac;
+
+	/*-
+	 * Calculating the scaling factor.  We want the number of 1/2^64
+	 * fractions of a second per period of the hardware counter, taking
+	 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
+	 * processing provides us with.
+	 *
+	 * The th_adjustment is nanoseconds per second with 32 bit binary
+	 * fraction and we want 64 bit binary fraction of second:
+	 *
+	 *	 x = a * 2^32 / 10^9 = a * 4.294967296
+	 *
+	 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
+	 * we can only multiply by about 850 without overflowing, that
+	 * leaves no suitably precise fractions for multiply before divide.
+	 *
+	 * Divide before multiply with a fraction of 2199/512 results in a
+	 * systematic undercompensation of 10PPM of th_adjustment.  On a
+	 * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
+	 *
+	 * We happily sacrifice the lowest of the 64 bits of our result
+	 * to the goddess of code clarity.
+	 *
+	 */
+	scale = (uint64_t)1 << 63;
+	scale += (adjustment / 1024) * 2199;
+	scale /= ticks_per_sec;
+	*tick_scale_x = scale * 2;
 
 	/*
-	 * Initialize ipc clock services.
+	 * hi part of adj
+	 * it contains ns (without fraction) to add to the next sec.
+	 * Get ns scale factor for the next sec.
 	 */
-	for (i = 0; i < clock_count; i++) {
-		clock = &clock_list[i];
-		if (clock->cl_ops) {
-			ipc_clock_init(clock);
-			ipc_clock_enable(clock);
-		}
-	}
+	nano = (adjustment > 0)? adjustment >> 32 : -((-adjustment) >> 32);
+	scale = (uint64_t) NSEC_PER_SEC;
+	scale += nano;
+	*s_scale_ns = scale;
 
 	/*
-	 * Perform miscellaneous late
-	 * initialization.
+	 * lo part of adj
+	 * it contains 32 bit frac of ns to add to the next sec.
+	 * Keep it as additional adjustment for the next sec.
 	 */
-	i = sizeof(struct alarm);
-	alarm_zone = zinit(i, (4096/i)*i, 10*i, "alarms");
+	frac = (adjustment > 0)? ((uint32_t) adjustment) : -((uint32_t) (-adjustment));
+	*s_adj_nsx = (frac>0)? frac << 32 : -( (-frac) << 32);
+
+	return;
 }
 
 /*
- * Get the service port on a clock.
+ * scale_delta:
+ *
+ * returns a bintime struct representing delta scaled accordingly to the
+ * scale factors provided to this function.
  */
-kern_return_t
-host_get_clock_service(
-	host_t			host,
-	clock_id_t		clock_id,
-	clock_t			*clock)		/* OUT */
-{
-	if (host == HOST_NULL || clock_id < 0 || clock_id >= clock_count) {
-		*clock = CLOCK_NULL;
-		return (KERN_INVALID_ARGUMENT);
+static struct bintime
+scale_delta(uint64_t delta, uint64_t tick_scale_x, uint64_t s_scale_ns, int64_t s_adj_nsx)
+{
+	uint64_t sec, new_ns, over;
+	struct bintime bt;
+
+	bt.sec = 0;
+	bt.frac = 0;
+
+	/*
+	 * If more than one second is elapsed,
+	 * scale fully elapsed seconds using scale factors for seconds.
+	 * s_scale_ns -> scales sec to ns.
+	 * s_adj_nsx -> additional adj expressed in 64 bit frac of ns to apply to each sec.
+	 */
+	if (delta > ticks_per_sec) {
+		sec = (delta/ticks_per_sec);
+		new_ns = sec * s_scale_ns;
+		bintime_addns(&bt, new_ns);
+		if (s_adj_nsx) {
+			if (sec == 1) {
+				/* shortcut, no overflow can occur */
+				if (s_adj_nsx > 0)
+					bintime_addx(&bt, (uint64_t)s_adj_nsx/ (uint64_t)NSEC_PER_SEC);
+				else
+					bintime_subx(&bt, (uint64_t)-s_adj_nsx/ (uint64_t)NSEC_PER_SEC);
+			}
+			else{
+				/*
+				 * s_adj_nsx is 64 bit frac of ns.
+				 * sec*s_adj_nsx might overflow in int64_t.
+				 * use bintime_addxns to not lose overflowed ns.
+				 */
+				bintime_addxns(&bt, sec, s_adj_nsx);
+			}
+		}
+		delta = (delta % ticks_per_sec);
+        }
+
+	over = multi_overflow(tick_scale_x, delta);
+	if(over){
+		bt.sec += over;
 	}
 
-	*clock = &clock_list[clock_id];
-	if ((*clock)->cl_ops == 0)
-		return (KERN_FAILURE);
-	return (KERN_SUCCESS);
+	/*
+	 * scale elapsed ticks using the scale factor for ticks.
+	 */
+	bintime_addx(&bt, delta * tick_scale_x);
+
+	return bt;
 }
 
 /*
- * Get the control port on a clock.
+ * get_scaled_time:
+ *
+ * returns the scaled time of the time elapsed from the last time
+ * scale factors were updated to now.
  */
-kern_return_t
-host_get_clock_control(
-	host_priv_t		host_priv,
-	clock_id_t		clock_id,
-	clock_t			*clock)		/* OUT */
-{
-	if (host_priv == HOST_PRIV_NULL || clock_id < 0 || clock_id >= clock_count) {
-		*clock = CLOCK_NULL;
-		return (KERN_INVALID_ARGUMENT);
-	}
+static struct bintime
+get_scaled_time(uint64_t now)
+{
+	uint64_t delta;
 
-	*clock = &clock_list[clock_id];
-	if ((*clock)->cl_ops == 0)
-		return (KERN_FAILURE);
-	return (KERN_SUCCESS);
+	/*
+	 * Compute ticks elapsed since last scale update.
+	 * This time will be scaled according to the value given by ntp kern.
+	 */
+	delta = now - clock_calend.offset_count;
+
+	return scale_delta(delta, clock_calend.tick_scale_x, clock_calend.s_scale_ns, clock_calend.s_adj_nsx);
+}
+
+static void
+clock_get_calendar_absolute_and_microtime_locked(
+	clock_sec_t		*secs,
+	clock_usec_t		*microsecs,
+	uint64_t    		*abstime)
+{
+	uint64_t now;
+	struct bintime bt;
+
+	now  = mach_absolute_time();
+	if (abstime)
+		*abstime = now;
+
+	bt = get_scaled_time(now);
+	bintime_add(&bt, &clock_calend.bintime);
+	bintime2usclock(&bt, secs, microsecs);
+}
+
+static void
+clock_get_calendar_absolute_and_nanotime_locked(
+	clock_sec_t		*secs,
+	clock_usec_t		*nanosecs,
+	uint64_t    		*abstime)
+{
+	uint64_t now;
+	struct bintime bt;
+
+	now  = mach_absolute_time();
+	if (abstime)
+		*abstime = now;
+
+	bt = get_scaled_time(now);
+	bintime_add(&bt, &clock_calend.bintime);
+	bintime2nsclock(&bt, secs, nanosecs);
 }
 
 /*
- * Get the current clock time.
+ *	clock_get_calendar_absolute_and_microtime:
+ *
+ *	Returns the current calendar value,
+ *	microseconds as the fraction. Also
+ *	returns mach_absolute_time if abstime
+ *	is not NULL.
  */
-kern_return_t
-clock_get_time(
-	clock_t			clock,
-	mach_timespec_t	*cur_time)	/* OUT */
+void
+clock_get_calendar_absolute_and_microtime(
+	clock_sec_t		*secs,
+	clock_usec_t		*microsecs,
+	uint64_t    		*abstime)
 {
-	if (clock == CLOCK_NULL)
-		return (KERN_INVALID_ARGUMENT);
-	return ((*clock->cl_ops->c_gettime)(cur_time));
+	spl_t			s;
+
+	s = splclock();
+	clock_lock();
+
+	clock_get_calendar_absolute_and_microtime_locked(secs, microsecs, abstime);
+
+	clock_unlock();
+	splx(s);
 }
 
 /*
- * Get clock attributes.
+ *	clock_get_calendar_nanotime:
+ *
+ *	Returns the current calendar value,
+ *	nanoseconds as the fraction.
+ *
+ *	Since we do not have an interface to
+ *	set the calendar with resolution greater
+ *	than a microsecond, we honor that here.
  */
-kern_return_t
-clock_get_attributes(
-	clock_t					clock,
-	clock_flavor_t			flavor,
-	clock_attr_t			attr,		/* OUT */
-	mach_msg_type_number_t	*count)		/* IN/OUT */
-{
-	kern_return_t	(*getattr)(
-						clock_flavor_t			flavor,
-						clock_attr_t			attr,
-						mach_msg_type_number_t	*count);
-
-	if (clock == CLOCK_NULL)
-		return (KERN_INVALID_ARGUMENT);
-	if (getattr = clock->cl_ops->c_getattr)
-		return((*getattr)(flavor, attr, count));
-	else
-		return (KERN_FAILURE);
+void
+clock_get_calendar_nanotime(
+	clock_sec_t		*secs,
+	clock_nsec_t		*nanosecs)
+{
+	spl_t			s;
+
+	s = splclock();
+	clock_lock();
+
+	clock_get_calendar_absolute_and_nanotime_locked(secs, nanosecs, NULL);
+
+	clock_unlock();
+	splx(s);
 }
 
 /*
- * Set the current clock time.
+ *	clock_gettimeofday:
+ *
+ *	Kernel interface for commpage implementation of
+ *	gettimeofday() syscall.
+ *
+ *	Returns the current calendar value, and updates the
+ *	commpage info as appropriate.  Because most calls to
+ *	gettimeofday() are handled in user mode by the commpage,
+ *	this routine should be used infrequently.
  */
-kern_return_t
-clock_set_time(
-	clock_t			clock,
-	mach_timespec_t	new_time)
-{
-	mach_timespec_t	*clock_time;
-	kern_return_t	(*settime)(
-						mach_timespec_t		*clock_time);
-
-	if (clock == CLOCK_NULL)
-		return (KERN_INVALID_ARGUMENT);
-	if ((settime = clock->cl_ops->c_settime) == 0)
-		return (KERN_FAILURE);
-	clock_time = &new_time;
-	if (BAD_MACH_TIMESPEC(clock_time))
-		return (KERN_INVALID_VALUE);
+void
+clock_gettimeofday(
+	clock_sec_t	*secs,
+	clock_usec_t	*microsecs)
+{
+	clock_gettimeofday_and_absolute_time(secs, microsecs, NULL);
+}
 
-	/*
-	 * Flush all outstanding alarms.
-	 */
-	flush_alarms(clock);
+void
+clock_gettimeofday_and_absolute_time(
+	clock_sec_t	*secs,
+	clock_usec_t	*microsecs,
+	uint64_t	*mach_time)
+{
+	uint64_t		now;
+	spl_t			s;
+	struct bintime 	bt;
+
+	s = splclock();
+	clock_lock();
+
+	now = mach_absolute_time();
+	bt = get_scaled_time(now);
+	bintime_add(&bt, &clock_calend.bintime);
+	bintime2usclock(&bt, secs, microsecs);
 
+	clock_gettimeofday_set_commpage(now, bt.sec, bt.frac, clock_calend.tick_scale_x, ticks_per_sec);
+
+	clock_unlock();
+	splx(s);
+
+	if (mach_time) {
+		*mach_time = now;
+	}
+}
+
+static void
+update_basesleep(struct bintime delta, bool forward)
+{
 	/*
-	 * Set the new time.
+	 * Update basesleep only if the platform does not have monotonic clock.
+	 * In that case the sleep time computation will use the PMU time
+	 * which offset gets modified by settimeofday.
+	 * We don't need this for mononic clock because in that case the sleep
+	 * time computation is independent from the offset value of the PMU.
 	 */
-	return ((*settime)(clock_time));
+	if (!has_monotonic_clock) {
+		if (forward)
+			bintime_add(&clock_calend.basesleep, &delta);
+		else
+			bintime_sub(&clock_calend.basesleep, &delta);
+	}
 }
 
 /*
- * Set the clock alarm resolution.
+ *	clock_set_calendar_microtime:
+ *
+ *	Sets the current calendar value by
+ *	recalculating the epoch and offset
+ *	from the system clock.
+ *
+ *	Also adjusts the boottime to keep the
+ *	value consistent, writes the new
+ *	calendar value to the platform clock,
+ *	and sends calendar change notifications.
  */
-kern_return_t
-clock_set_attributes(
-	clock_t					clock,
-	clock_flavor_t			flavor,
-	clock_attr_t			attr,
-	mach_msg_type_number_t	count)
-{
-	kern_return_t	(*setattr)(
-						clock_flavor_t			flavor,
-						clock_attr_t			attr,
-						mach_msg_type_number_t	count);
-
-	if (clock == CLOCK_NULL)
-		return (KERN_INVALID_ARGUMENT);
-	if (setattr = clock->cl_ops->c_setattr)
-		return ((*setattr)(flavor, attr, count));
-	else
-		return (KERN_FAILURE);
-}
+void
+clock_set_calendar_microtime(
+	clock_sec_t		secs,
+	clock_usec_t		microsecs)
+{
+	uint64_t		absolutesys;
+	clock_sec_t		newsecs;
+	clock_sec_t		oldsecs;
+	clock_usec_t        	newmicrosecs;
+	clock_usec_t		oldmicrosecs;
+	uint64_t		commpage_value;
+	spl_t			s;
+	struct bintime		bt;
+	clock_sec_t		deltasecs;
+	clock_usec_t		deltamicrosecs;
+
+	newsecs = secs;
+	newmicrosecs = microsecs;
 
-/*
- * Setup a clock alarm.
- */
-kern_return_t
-clock_alarm(
-	clock_t					clock,
-	alarm_type_t			alarm_type,
-	mach_timespec_t			alarm_time,
-	ipc_port_t				alarm_port,
-	mach_msg_type_name_t	alarm_port_type)
-{
-	alarm_t					alarm;
-	mach_timespec_t			clock_time;
-	int						chkstat;
-	kern_return_t			reply_code;
-	spl_t					s;
-
-	if (clock == CLOCK_NULL)
-		return (KERN_INVALID_ARGUMENT);
-	if (clock->cl_ops->c_setalrm == 0)
-		return (KERN_FAILURE);
-	if (IP_VALID(alarm_port) == 0)
-		return (KERN_INVALID_CAPABILITY);
+	/*
+	 * settime_lock mtx is used to avoid that racing settimeofdays update the wall clock and
+	 * the platform clock concurrently.
+	 *
+	 * clock_lock cannot be used for this race because it is acquired from interrupt context
+	 * and it needs interrupts disabled while instead updating the platform clock needs to be
+	 * called with interrupts enabled.
+	 */
+	lck_mtx_lock(&settime_lock);
+
+	s = splclock();
+	clock_lock();
+
+#if DEVELOPMENT || DEBUG
+	struct clock_calend clock_calend_cp = clock_calend;
+#endif
+	commpage_disable_timestamp();
 
 	/*
-	 * Check alarm parameters. If parameters are invalid,
-	 * send alarm message immediately.
+	 *	Adjust the boottime based on the delta.
 	 */
-	(*clock->cl_ops->c_gettime)(&clock_time);
-	chkstat = check_time(alarm_type, &alarm_time, &clock_time);
-	if (chkstat <= 0) {
-		reply_code = (chkstat < 0 ? KERN_INVALID_VALUE : KERN_SUCCESS);
-		clock_alarm_reply(alarm_port, alarm_port_type,
-				  reply_code, alarm_type, clock_time);
-		return (KERN_SUCCESS);
+	clock_get_calendar_absolute_and_microtime_locked(&oldsecs, &oldmicrosecs, &absolutesys);
+
+#if DEVELOPMENT || DEBUG
+	if (g_should_log_clock_adjustments) {
+		os_log(OS_LOG_DEFAULT, "%s wall %lu s %d u computed with %llu abs\n",
+		       __func__, (unsigned long)oldsecs, oldmicrosecs, absolutesys);
+		os_log(OS_LOG_DEFAULT, "%s requested %lu s %d u\n",
+		       __func__,  (unsigned long)secs, microsecs );
 	}
+#endif
+
+	if (oldsecs < secs || (oldsecs == secs && oldmicrosecs < microsecs)) {
+		// moving forwards
+		deltasecs = secs;
+		deltamicrosecs = microsecs;
+
+		TIME_SUB(deltasecs, oldsecs, deltamicrosecs, oldmicrosecs, USEC_PER_SEC);
+
+#if DEVELOPMENT || DEBUG
+		if (g_should_log_clock_adjustments) {
+			os_log(OS_LOG_DEFAULT, "%s delta requested %lu s %d u\n",
+			       __func__, (unsigned long)deltasecs, deltamicrosecs);
+		}
+#endif
+
+		TIME_ADD(clock_boottime, deltasecs, clock_boottime_usec, deltamicrosecs, USEC_PER_SEC);
+		clock2bintime(&deltasecs, &deltamicrosecs, &bt);
+		bintime_add(&clock_calend.boottime, &bt);
+		update_basesleep(bt, TRUE);
+	} else {
+		// moving backwards
+		deltasecs = oldsecs;
+		deltamicrosecs = oldmicrosecs;
+
+		TIME_SUB(deltasecs, secs, deltamicrosecs, microsecs, USEC_PER_SEC);
+#if DEVELOPMENT || DEBUG
+		if (g_should_log_clock_adjustments) {
+			os_log(OS_LOG_DEFAULT, "%s negative delta requested %lu s %d u\n",
+			       __func__, (unsigned long)deltasecs, deltamicrosecs);
+		}
+#endif
+
+		TIME_SUB(clock_boottime, deltasecs, clock_boottime_usec, deltamicrosecs, USEC_PER_SEC);
+		clock2bintime(&deltasecs, &deltamicrosecs, &bt);
+		bintime_sub(&clock_calend.boottime, &bt);
+		update_basesleep(bt, FALSE);
+	}
+
+	clock_calend.bintime = clock_calend.boottime;
+	bintime_add(&clock_calend.bintime, &clock_calend.offset);
+
+	clock2bintime((clock_sec_t *) &secs, (clock_usec_t *) &microsecs, &bt);
+
+	clock_gettimeofday_set_commpage(absolutesys, bt.sec, bt.frac, clock_calend.tick_scale_x, ticks_per_sec);
+
+#if DEVELOPMENT || DEBUG
+	struct clock_calend clock_calend_cp1 = clock_calend;
+#endif
+
+	commpage_value = clock_boottime * USEC_PER_SEC + clock_boottime_usec;
+
+	clock_unlock();
+	splx(s);
 
 	/*
-	 * Get alarm and add to clock alarm list.
+	 *	Set the new value for the platform clock.
+	 *	This call might block, so interrupts must be enabled.
 	 */
+#if DEVELOPMENT || DEBUG
+	uint64_t now_b = mach_absolute_time();
+#endif
 
-	LOCK_CLOCK(s);
-	if ((alarm = alrmfree) == 0) {
-		UNLOCK_CLOCK(s);
-		alarm = (alarm_t) zalloc(alarm_zone);
-		if (alarm == 0)
-			return (KERN_RESOURCE_SHORTAGE);
-		LOCK_CLOCK(s);
+	PESetUTCTimeOfDay(newsecs, newmicrosecs);
+
+#if DEVELOPMENT || DEBUG
+	uint64_t now_a = mach_absolute_time();
+	if (g_should_log_clock_adjustments) {
+		os_log(OS_LOG_DEFAULT, "%s mach bef PESet %llu mach aft %llu \n", __func__, now_b, now_a);
 	}
-	else
-		alrmfree = alarm->al_next;
-
-	alarm->al_status = ALARM_CLOCK;
-	alarm->al_time = alarm_time;
-	alarm->al_type = alarm_type;
-	alarm->al_port = alarm_port;
-	alarm->al_port_type = alarm_port_type;
-	alarm->al_clock = clock;
-	alarm->al_seqno = alrm_seqno++;
-	post_alarm(clock, alarm);
-	UNLOCK_CLOCK(s);
+#endif
 
-	return (KERN_SUCCESS);
+	print_all_clock_variables_internal(__func__, &clock_calend_cp);
+	print_all_clock_variables_internal(__func__, &clock_calend_cp1);
+
+	commpage_update_boottime(commpage_value);
+
+	/*
+	 *	Send host notifications.
+	 */
+	host_notify_calendar_change();
+	host_notify_calendar_set();
+
+#if CONFIG_DTRACE
+	clock_track_calend_nowait();
+#endif
+
+	lck_mtx_unlock(&settime_lock);
 }
 
+uint64_t mach_absolutetime_asleep = 0;
+uint64_t mach_absolutetime_last_sleep = 0;
+
+void
+clock_get_calendar_uptime(clock_sec_t *secs)
+{
+	uint64_t now;
+	spl_t s;
+	struct bintime bt;
+
+	s = splclock();
+	clock_lock();
+
+	now = mach_absolute_time();
+
+	bt = get_scaled_time(now);
+	bintime_add(&bt, &clock_calend.offset);
+
+	*secs = bt.sec;
+
+	clock_unlock();
+	splx(s);
+}
+
+
 /*
- * Sleep on a clock. System trap. User-level libmach clock_sleep
- * interface call takes a mach_timespec_t sleep_time argument which it
- * converts to sleep_sec and sleep_nsec arguments which are then
- * passed to clock_sleep_trap.
+ * clock_update_calendar:
+ *
+ * called by ntp timer to update scale factors.
  */
-kern_return_t
-clock_sleep_trap(
-	mach_port_name_t	clock_name,
-	sleep_type_t		sleep_type,
-	int					sleep_sec,
-	int					sleep_nsec,
-	mach_timespec_t		*wakeup_time)
+void
+clock_update_calendar(void)
 {
-	clock_t				clock;
-	mach_timespec_t		swtime;
-	kern_return_t		rvalue;
 
-	/*
-	 * Convert the trap parameters.
-	 */
-	if (clock_name != MACH_PORT_NULL)
-		clock = port_name_to_clock(clock_name);
-	else
-		clock = &clock_list[SYSTEM_CLOCK];
+	uint64_t now, delta;
+	struct bintime bt;
+	spl_t s;
+	int64_t adjustment;
+
+	s = splclock();
+	clock_lock();
 
-	swtime.tv_sec  = sleep_sec;
-	swtime.tv_nsec = sleep_nsec;
+	now  = mach_absolute_time();
 
 	/*
-	 * Call the actual clock_sleep routine.
+	 * scale the time elapsed since the last update and
+	 * add it to offset.
 	 */
-	rvalue = clock_sleep_internal(clock, sleep_type, &swtime);
+	bt = get_scaled_time(now);
+	bintime_add(&clock_calend.offset, &bt);
 
 	/*
-	 * Return current time as wakeup time.
+	 * update the base from which apply next scale factors.
 	 */
-	if (rvalue != KERN_INVALID_ARGUMENT && rvalue != KERN_FAILURE) {
-		copyout((char *)&swtime, (char *)wakeup_time,
-			sizeof(mach_timespec_t));
-	}
-	return (rvalue);
-}	
+	delta = now - clock_calend.offset_count;
+	clock_calend.offset_count += delta;
 
-/*
- * Kernel internally callable clock sleep routine. The calling
- * thread is suspended until the requested sleep time is reached.
- */
-kern_return_t
-clock_sleep_internal(
-	clock_t				clock,
-	sleep_type_t		sleep_type,
-	mach_timespec_t		*sleep_time)
-{
-	alarm_t				alarm;
-	mach_timespec_t		clock_time;
-	kern_return_t		rvalue;
-	int					chkstat;
-	spl_t				s;
-
-	if (clock == CLOCK_NULL)
-		return (KERN_INVALID_ARGUMENT);
-	if (clock->cl_ops->c_setalrm == 0)
-		return (KERN_FAILURE);
+	clock_calend.bintime = clock_calend.offset;
+	bintime_add(&clock_calend.bintime, &clock_calend.boottime);
 
 	/*
-	 * Check sleep parameters. If parameters are invalid
-	 * return an error, otherwise post alarm request.
+	 * recompute next adjustment.
 	 */
-	(*clock->cl_ops->c_gettime)(&clock_time);
+	ntp_update_second(&adjustment, clock_calend.bintime.sec);
 
-	chkstat = check_time(sleep_type, sleep_time, &clock_time);
-	if (chkstat < 0)
-		return (KERN_INVALID_VALUE);
-	rvalue = KERN_SUCCESS;
-	if (chkstat > 0) {
-		wait_result_t wait_result;
+#if DEVELOPMENT || DEBUG
+	if (g_should_log_clock_adjustments) {
+		os_log(OS_LOG_DEFAULT, "%s adjustment %lld\n", __func__, adjustment);
+	}
+#endif
+	
+	/*
+	 * recomputing scale factors.
+	 */
+	get_scale_factors_from_adj(adjustment, &clock_calend.tick_scale_x, &clock_calend.s_scale_ns, &clock_calend.s_adj_nsx);
 
-		/*
-		 * Get alarm and add to clock alarm list.
-		 */
+	clock_gettimeofday_set_commpage(now, clock_calend.bintime.sec, clock_calend.bintime.frac, clock_calend.tick_scale_x, ticks_per_sec);
 
-		LOCK_CLOCK(s);
-		if ((alarm = alrmfree) == 0) {
-			UNLOCK_CLOCK(s);
-			alarm = (alarm_t) zalloc(alarm_zone);
-			if (alarm == 0)
-				return (KERN_RESOURCE_SHORTAGE);
-			LOCK_CLOCK(s);
-		}
-		else
-			alrmfree = alarm->al_next;
+#if DEVELOPMENT || DEBUG
+	struct clock_calend calend_cp = clock_calend;
+#endif
 
-		/*
-		 * Wait for alarm to occur.
-		 */
-		wait_result = assert_wait((event_t)alarm, THREAD_ABORTSAFE);
-		if (wait_result == THREAD_WAITING) {
-			alarm->al_time = *sleep_time;
-			alarm->al_status = ALARM_SLEEP;
-			post_alarm(clock, alarm);
-			UNLOCK_CLOCK(s);
-
-			wait_result = thread_block(THREAD_CONTINUE_NULL);
-
-			/*
-			 * Note if alarm expired normally or whether it
-			 * was aborted. If aborted, delete alarm from
-			 * clock alarm list. Return alarm to free list.
-			 */
-			LOCK_CLOCK(s);
-			if (alarm->al_status != ALARM_DONE) {
-				assert(wait_result != THREAD_AWAKENED);
-				if ((alarm->al_prev)->al_next = alarm->al_next)
-					(alarm->al_next)->al_prev = alarm->al_prev;
-				rvalue = KERN_ABORTED;
-			}
-			*sleep_time = alarm->al_time;
-			alarm->al_status = ALARM_FREE;
-		} else {
-			assert(wait_result == THREAD_INTERRUPTED);
-			assert(alarm->al_status == ALARM_FREE);
-			rvalue = KERN_ABORTED;
-		}
-		alarm->al_next = alrmfree;
-		alrmfree = alarm;
-		UNLOCK_CLOCK(s);
+	clock_unlock();
+	splx(s);
+
+	print_all_clock_variables(__func__, NULL,NULL,NULL,NULL, &calend_cp);
+}
+
+
+#if DEVELOPMENT || DEBUG
+
+void print_all_clock_variables_internal(const char* func, struct clock_calend* clock_calend_cp)
+{
+	clock_sec_t     offset_secs;
+	clock_usec_t    offset_microsecs;
+	clock_sec_t     bintime_secs;
+	clock_usec_t    bintime_microsecs;
+	clock_sec_t     bootime_secs;
+	clock_usec_t    bootime_microsecs;
+	
+	if (!g_should_log_clock_adjustments)
+                return;
+
+	bintime2usclock(&clock_calend_cp->offset, &offset_secs, &offset_microsecs);
+	bintime2usclock(&clock_calend_cp->bintime, &bintime_secs, &bintime_microsecs);
+	bintime2usclock(&clock_calend_cp->boottime, &bootime_secs, &bootime_microsecs);
+
+	os_log(OS_LOG_DEFAULT, "%s s_scale_ns %llu s_adj_nsx %lld tick_scale_x %llu offset_count %llu\n",
+	       func , clock_calend_cp->s_scale_ns, clock_calend_cp->s_adj_nsx,
+	       clock_calend_cp->tick_scale_x, clock_calend_cp->offset_count);
+	os_log(OS_LOG_DEFAULT, "%s offset.sec %ld offset.frac %llu offset_secs %lu offset_microsecs %d\n",
+	       func, clock_calend_cp->offset.sec, clock_calend_cp->offset.frac,
+	       (unsigned long)offset_secs, offset_microsecs);
+	os_log(OS_LOG_DEFAULT, "%s bintime.sec %ld bintime.frac %llu bintime_secs %lu bintime_microsecs %d\n",
+	       func, clock_calend_cp->bintime.sec, clock_calend_cp->bintime.frac,
+	       (unsigned long)bintime_secs, bintime_microsecs);
+	os_log(OS_LOG_DEFAULT, "%s bootime.sec %ld bootime.frac %llu bootime_secs %lu bootime_microsecs %d\n",
+	       func, clock_calend_cp->boottime.sec, clock_calend_cp->boottime.frac,
+	       (unsigned long)bootime_secs, bootime_microsecs);
+
+	clock_sec_t     basesleep_secs;
+        clock_usec_t    basesleep_microsecs;
+	
+	bintime2usclock(&clock_calend_cp->basesleep, &basesleep_secs, &basesleep_microsecs);
+	os_log(OS_LOG_DEFAULT, "%s basesleep.sec %ld basesleep.frac %llu basesleep_secs %lu basesleep_microsecs %d\n",
+	       func, clock_calend_cp->basesleep.sec, clock_calend_cp->basesleep.frac,
+	       (unsigned long)basesleep_secs, basesleep_microsecs);
+
+}
+
+
+void print_all_clock_variables(const char* func, clock_sec_t* pmu_secs, clock_usec_t* pmu_usec, clock_sec_t* sys_secs, clock_usec_t* sys_usec, struct clock_calend* clock_calend_cp)
+{
+	if (!g_should_log_clock_adjustments)
+		return;
+
+	struct bintime  bt;
+	clock_sec_t     wall_secs;
+	clock_usec_t    wall_microsecs;
+	uint64_t now;
+	uint64_t delta;
+
+	if (pmu_secs) {
+		os_log(OS_LOG_DEFAULT, "%s PMU %lu s %d u \n", func, (unsigned long)*pmu_secs, *pmu_usec); 
 	}
-	else
-		*sleep_time = clock_time;
+	if (sys_secs) {
+		os_log(OS_LOG_DEFAULT, "%s sys %lu s %d u \n", func, (unsigned long)*sys_secs, *sys_usec);
+	}
+
+	print_all_clock_variables_internal(func, clock_calend_cp);
+
+	now = mach_absolute_time();
+        delta = now - clock_calend_cp->offset_count;
+
+        bt = scale_delta(delta, clock_calend_cp->tick_scale_x, clock_calend_cp->s_scale_ns, clock_calend_cp->s_adj_nsx);
+	bintime_add(&bt, &clock_calend_cp->bintime);
+	bintime2usclock(&bt, &wall_secs, &wall_microsecs);
 
-	return (rvalue);
+	os_log(OS_LOG_DEFAULT, "%s wall %lu s %d u computed with %llu abs\n",
+	       func, (unsigned long)wall_secs, wall_microsecs, now);
 }
 
-/*
- * CLOCK INTERRUPT SERVICE ROUTINES.
- */
+
+#endif /* DEVELOPMENT || DEBUG */
+
 
 /*
- * Service clock alarm interrupts. Called from machine dependent
- * layer at splclock(). The clock_id argument specifies the clock,
- * and the clock_time argument gives that clock's current time.
+ *	clock_initialize_calendar:
+ *
+ *	Set the calendar and related clocks
+ *	from the platform clock at boot.
+ *
+ *	Also sends host notifications.
  */
 void
-clock_alarm_intr(
-	clock_id_t			clock_id,
-	mach_timespec_t		*clock_time)
+clock_initialize_calendar(void)
 {
-	clock_t				clock;
-	register alarm_t	alrm1;
-	register alarm_t	alrm2;
-	mach_timespec_t		*alarm_time;
-	spl_t				s;
+	clock_sec_t		sys;  // sleepless time since boot in seconds
+	clock_sec_t		secs; // Current UTC time
+	clock_sec_t		utc_offset_secs; // Difference in current UTC time and sleepless time since boot
+	clock_usec_t		microsys;  
+	clock_usec_t		microsecs; 
+	clock_usec_t		utc_offset_microsecs; 
+	spl_t			s;
+	struct bintime 		bt;
+	struct bintime		monotonic_bt;
+	struct latched_time	monotonic_time;
+	uint64_t		monotonic_usec_total;
+	clock_sec_t             sys2, monotonic_sec;
+        clock_usec_t            microsys2, monotonic_usec;
+        size_t                  size;
+
+	//Get PMU time with offset and corresponding sys time
+	PEGetUTCTimeOfDay(&secs, &microsecs);
+	clock_get_system_microtime(&sys, &microsys);
+
+	/*
+	 * If the platform has a monotonic clock, use kern.monotonicclock_usecs
+	 * to estimate the sleep/wake time, otherwise use the PMU and adjustments
+	 * provided through settimeofday to estimate the sleep time.
+	 * NOTE: the latter case relies that the kernel is the only component
+	 * to set the PMU offset.
+	 */
+	size = sizeof(monotonic_time);
+	if (kernel_sysctlbyname("kern.monotonicclock_usecs", &monotonic_time, &size, NULL, 0) != 0) {
+		has_monotonic_clock = 0;
+		os_log(OS_LOG_DEFAULT, "%s system does not have monotonic clock.\n", __func__);
+	} else {
+		has_monotonic_clock = 1;
+		monotonic_usec_total = monotonic_time.monotonic_time_usec;
+		absolutetime_to_microtime(monotonic_time.mach_time, &sys2, &microsys2);
+		os_log(OS_LOG_DEFAULT, "%s system has monotonic clock.\n", __func__);
+	}
+
+	s = splclock();
+	clock_lock();
+
+	commpage_disable_timestamp();
 
-	clock = &clock_list[clock_id];
+	utc_offset_secs = secs;
+	utc_offset_microsecs = microsecs;
+
+#if DEVELOPMENT || DEBUG
+	last_utc_sec = secs;
+	last_utc_usec = microsecs;
+	last_sys_sec = sys;
+	last_sys_usec = microsys;
+	if (secs > max_utc_sec)
+		max_utc_sec = secs;
+#endif
 
 	/*
-	 * Update clock alarm list. All alarms that are due are moved
-	 * to the alarmdone list to be serviced by the alarm_thread.
+	 * We normally expect the UTC clock to be always-on and produce
+	 * greater readings than the tick counter.  There may be corner cases
+	 * due to differing clock resolutions (UTC clock is likely lower) and
+	 * and errors reading the UTC clock (some implementations return 0
+	 * on error) in which that doesn't hold true.  Bring the UTC measurements
+	 * in-line with the tick counter measurements as a best effort in that case.
 	 */
+	//FIXME if the current time is prior than 1970 secs will be negative
+	if ((sys > secs) || ((sys == secs) && (microsys > microsecs))) {
+		os_log(OS_LOG_DEFAULT, "%s WARNING: PMU offset is less then sys PMU %lu s %d u sys %lu s %d u\n",
+			__func__, (unsigned long) secs, microsecs, (unsigned long)sys, microsys);
+		secs = utc_offset_secs = sys;
+		microsecs = utc_offset_microsecs = microsys;
+	}
 
-	LOCK_CLOCK(s);
-	alrm1 = (alarm_t) &clock->cl_alarm;
-	while (alrm2 = alrm1->al_next) {
-		alarm_time = &alrm2->al_time;
-		if (CMP_MACH_TIMESPEC(alarm_time, clock_time) > 0)
-			break;
+	// PMU time with offset - sys
+	// This macro stores the subtraction result in utc_offset_secs and utc_offset_microsecs
+	TIME_SUB(utc_offset_secs, sys, utc_offset_microsecs, microsys, USEC_PER_SEC);
 
-		/*
-		 * Alarm has expired, so remove it from the
-		 * clock alarm list.
-		 */  
-		if (alrm1->al_next = alrm2->al_next)
-			(alrm1->al_next)->al_prev = alrm1;
+	clock2bintime(&utc_offset_secs, &utc_offset_microsecs, &bt);
 
-		/*
-		 * If a clock_sleep() alarm, wakeup the thread
-		 * which issued the clock_sleep() call.
-		 */
-		if (alrm2->al_status == ALARM_SLEEP) {
-			alrm2->al_next = 0;
-			alrm2->al_status = ALARM_DONE;
-			alrm2->al_time = *clock_time;
-			thread_wakeup((event_t)alrm2);
-		}
+	/*
+	 *	Initialize the boot time based on the platform clock.
+	 */
+	clock_boottime = secs;
+	clock_boottime_usec = microsecs;
+	commpage_update_boottime(clock_boottime * USEC_PER_SEC + clock_boottime_usec);
+
+	nanoseconds_to_absolutetime((uint64_t)NSEC_PER_SEC, &ticks_per_sec);
+	clock_calend.boottime = bt;
+	clock_calend.bintime = bt;
+	clock_calend.offset.sec = 0;
+	clock_calend.offset.frac = 0;
+
+	clock_calend.tick_scale_x = (uint64_t)1 << 63;
+	clock_calend.tick_scale_x /= ticks_per_sec;
+	clock_calend.tick_scale_x *= 2;
+
+	clock_calend.s_scale_ns = NSEC_PER_SEC;
+	clock_calend.s_adj_nsx = 0;
+
+	if (has_monotonic_clock) {
+
+		monotonic_sec = monotonic_usec_total / (clock_sec_t)USEC_PER_SEC;
+		monotonic_usec = monotonic_usec_total % (clock_usec_t)USEC_PER_SEC;
+
+		// PMU time without offset - sys
+		// This macro stores the subtraction result in monotonic_sec and monotonic_usec
+		TIME_SUB(monotonic_sec, sys2, monotonic_usec, microsys2, USEC_PER_SEC);
+		clock2bintime(&monotonic_sec, &monotonic_usec, &monotonic_bt);
+
+		// set the baseleep as the difference between monotonic clock - sys
+		clock_calend.basesleep = monotonic_bt;
+	} else {
+		// set the baseleep as the difference between PMU clock - sys
+		clock_calend.basesleep = bt;
+	}
+	commpage_update_mach_continuous_time(mach_absolutetime_asleep);
 
- 		/*
-		 * If a clock_alarm() alarm, place the alarm on
-		 * the alarm done list and schedule the alarm
-		 * delivery mechanism.
-		 */
-		else {
-			assert(alrm2->al_status == ALARM_CLOCK);
-			if (alrm2->al_next = alrmdone)
-				alrmdone->al_prev = alrm2;
-			else
-				thread_call_enter(&alarm_deliver);
-			alrm2->al_prev = (alarm_t) &alrmdone;
-			alrmdone = alrm2;
-			alrm2->al_status = ALARM_DONE;
-			alrm2->al_time = *clock_time;
+#if DEVELOPMENT || DEBUG
+	struct clock_calend clock_calend_cp = clock_calend;
+#endif
+
+	clock_unlock();
+	splx(s);
+
+        print_all_clock_variables(__func__, &secs, &microsecs, &sys, &microsys, &clock_calend_cp);
+
+	/*
+	 *	Send host notifications.
+	 */
+	host_notify_calendar_change();
+	
+#if CONFIG_DTRACE
+	clock_track_calend_nowait();
+#endif
+}
+
+
+void
+clock_wakeup_calendar(void)
+{
+	clock_sec_t		sys;
+	clock_sec_t		secs;
+	clock_usec_t		microsys;
+	clock_usec_t		microsecs;
+	spl_t			s;
+	struct bintime		bt, last_sleep_bt;
+	clock_sec_t             basesleep_s, last_sleep_sec;
+	clock_usec_t            basesleep_us, last_sleep_usec;
+	struct latched_time     monotonic_time;
+	uint64_t		monotonic_usec_total;
+	size_t 			size;
+	clock_sec_t secs_copy;
+        clock_usec_t microsecs_copy;
+#if DEVELOPMENT || DEBUG
+	clock_sec_t utc_sec;
+	clock_usec_t utc_usec;
+	PEGetUTCTimeOfDay(&utc_sec, &utc_usec);
+#endif
+
+	/*
+	 * If the platform has the monotonic clock use that to
+	 * compute the sleep time. The monotonic clock does not have an offset
+	 * that can be modified, so nor kernel or userspace can change the time
+	 * of this clock, it can only monotonically increase over time.
+	 * During sleep mach_absolute_time does not tick,
+	 * so the sleep time is the difference betwen the current monotonic time
+	 * less the absolute time and the previous difference stored at wake time.
+	 *
+	 * basesleep = monotonic - sys ---> computed at last wake
+	 * sleep_time = (monotonic - sys) - basesleep
+	 *
+	 * If the platform does not support monotonic time we use the PMU time
+	 * to compute the last sleep.
+	 * The PMU time is the monotonic clock + an offset that can be set
+	 * by kernel.
+	 *
+	 * IMPORTANT:
+	 * We assume that only the kernel is setting the offset of the PMU and that
+	 * it is doing it only througth the settimeofday interface.
+	 *
+	 * basesleep is the different between the PMU time and the mach_absolute_time
+	 * at wake.
+	 * During awake time settimeofday can change the PMU offset by a delta,
+	 * and basesleep is shifted by the same delta applyed to the PMU. So the sleep
+	 * time computation becomes:
+	 *
+	 * PMU = monotonic + PMU_offset
+	 * basesleep = PMU - sys ---> computed at last wake
+	 * basesleep += settimeofday_delta
+	 * PMU_offset += settimeofday_delta
+	 * sleep_time = (PMU - sys) - basesleep
+	 */
+	if (has_monotonic_clock) {
+		//Get monotonic time with corresponding sys time
+		size = sizeof(monotonic_time);
+		if (kernel_sysctlbyname("kern.monotonicclock_usecs", &monotonic_time, &size, NULL, 0) != 0) {
+			panic("%s: could not call kern.monotonicclock_usecs", __func__);
 		}
+		monotonic_usec_total = monotonic_time.monotonic_time_usec;
+		absolutetime_to_microtime(monotonic_time.mach_time, &sys, &microsys);
+
+		secs = monotonic_usec_total / (clock_sec_t)USEC_PER_SEC;
+		microsecs = monotonic_usec_total % (clock_usec_t)USEC_PER_SEC;
+	} else {
+		//Get PMU time with offset and corresponding sys time
+		PEGetUTCTimeOfDay(&secs, &microsecs);
+		clock_get_system_microtime(&sys, &microsys);
+
 	}
 
+	s = splclock();
+	clock_lock();
+	
+	commpage_disable_timestamp();
+
+	secs_copy = secs;
+	microsecs_copy = microsecs;
+
+#if DEVELOPMENT || DEBUG
+	struct clock_calend clock_calend_cp1 = clock_calend;
+#endif /* DEVELOPMENT || DEBUG */
+
+#if DEVELOPMENT || DEBUG
+	last_utc_sec = secs;
+	last_utc_usec = microsecs;
+	last_sys_sec = sys;
+	last_sys_usec = microsys;
+	if (secs > max_utc_sec)
+		max_utc_sec = secs;
+#endif
 	/*
-	 * Setup the clock dependent layer to deliver another
-	 * interrupt for the next pending alarm.
+	 * We normally expect the UTC clock to be always-on and produce
+	 * greater readings than the tick counter.  There may be corner cases
+	 * due to differing clock resolutions (UTC clock is likely lower) and
+	 * and errors reading the UTC clock (some implementations return 0
+	 * on error) in which that doesn't hold true.  Bring the UTC measurements
+	 * in-line with the tick counter measurements as a best effort in that case.
 	 */
-	if (alrm2)
-		(*clock->cl_ops->c_setalrm)(alarm_time);
-	UNLOCK_CLOCK(s);
+	//FIXME if the current time is prior than 1970 secs will be negative
+	if ((sys > secs) || ((sys == secs) && (microsys > microsecs))) {
+		os_log(OS_LOG_DEFAULT, "%s WARNING: %s is less then sys %s %lu s %d u sys %lu s %d u\n",
+			__func__, (has_monotonic_clock)?"monotonic":"PMU", (has_monotonic_clock)?"monotonic":"PMU", (unsigned long)secs, microsecs, (unsigned long)sys, microsys);
+		secs = sys;
+		microsecs = microsys;
+	}
+
+	// PMU or monotonic - sys
+	// This macro stores the subtraction result in secs and microsecs
+	TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC);
+	clock2bintime(&secs, &microsecs, &bt);
+
+	/*
+	 * Safety belt: the UTC clock will likely have a lower resolution than the tick counter.
+	 * It's also possible that the device didn't fully transition to the powered-off state on
+	 * the most recent sleep, so the tick counter may not have reset or may have only briefly
+	 * tured off.  In that case it's possible for the difference between the UTC clock and the
+	 * tick counter to be less than the previously recorded value in clock.calend.basesleep.
+	 * In that case simply record that we slept for 0 ticks.
+	 */ 
+	if ((bt.sec > clock_calend.basesleep.sec) ||
+	    ((bt.sec == clock_calend.basesleep.sec) && (bt.frac > clock_calend.basesleep.frac))) {
+
+		//last_sleep is the difference between current PMU or monotonic - abs and last wake PMU or monotonic - abs
+		last_sleep_bt = bt;
+		bintime_sub(&last_sleep_bt, &clock_calend.basesleep);
+
+		//set baseseep to current PMU or monotonic - abs
+		clock_calend.basesleep = bt;
+		bintime2usclock(&last_sleep_bt, &last_sleep_sec, &last_sleep_usec);
+		bintime2absolutetime(&last_sleep_bt, &mach_absolutetime_last_sleep);
+		mach_absolutetime_asleep += mach_absolutetime_last_sleep;
+
+		bintime_add(&clock_calend.offset, &last_sleep_bt);
+		bintime_add(&clock_calend.bintime, &last_sleep_bt);
+
+	} else{
+		mach_absolutetime_last_sleep = 0;
+		last_sleep_sec = last_sleep_usec = 0;
+		bintime2usclock(&clock_calend.basesleep, &basesleep_s, &basesleep_us);
+		os_log(OS_LOG_DEFAULT, "%s WARNING: basesleep (%lu s %d u)  > %s-sys (%lu s %d u) \n",
+			__func__, (unsigned long) basesleep_s, basesleep_us, (has_monotonic_clock)?"monotonic":"PMU", (unsigned long) secs_copy, microsecs_copy );
+	}
+
+	KERNEL_DEBUG_CONSTANT(
+		  MACHDBG_CODE(DBG_MACH_CLOCK,MACH_EPOCH_CHANGE) | DBG_FUNC_NONE,
+		  (uintptr_t) mach_absolutetime_last_sleep,
+		  (uintptr_t) mach_absolutetime_asleep,
+		  (uintptr_t) (mach_absolutetime_last_sleep >> 32),
+		  (uintptr_t) (mach_absolutetime_asleep >> 32),
+		  0);
+
+	commpage_update_mach_continuous_time(mach_absolutetime_asleep);
+	adjust_cont_time_thread_calls();
+
+#if DEVELOPMENT || DEBUG
+	struct clock_calend clock_calend_cp = clock_calend;
+#endif
+
+	clock_unlock();
+	splx(s);
+
+#if DEVELOPMENT || DEBUG
+	if (g_should_log_clock_adjustments) {
+		os_log(OS_LOG_DEFAULT, "PMU was %lu s %d u\n",(unsigned long) utc_sec, utc_usec);
+		os_log(OS_LOG_DEFAULT, "last sleep was %lu s %d u\n",(unsigned long) last_sleep_sec, last_sleep_usec);
+		print_all_clock_variables("clock_wakeup_calendar:BEFORE",
+	                          &secs_copy, &microsecs_copy, &sys, &microsys, &clock_calend_cp1);
+		print_all_clock_variables("clock_wakeup_calendar:AFTER", NULL, NULL, NULL, NULL, &clock_calend_cp);
+	}
+#endif /* DEVELOPMENT || DEBUG */
+
+	host_notify_calendar_change();
+
+#if CONFIG_DTRACE
+	clock_track_calend_nowait();
+#endif
 }
 
+
 /*
- * ALARM DELIVERY ROUTINES.
+ *	clock_get_boottime_nanotime:
+ *
+ *	Return the boottime, used by sysctl.
  */
+void
+clock_get_boottime_nanotime(
+	clock_sec_t			*secs,
+	clock_nsec_t		*nanosecs)
+{
+	spl_t	s;
 
+	s = splclock();
+	clock_lock();
+
+	*secs = (clock_sec_t)clock_boottime;
+	*nanosecs = (clock_nsec_t)clock_boottime_usec * NSEC_PER_USEC;
+
+	clock_unlock();
+	splx(s);
+}
+
+/*
+ *	clock_get_boottime_nanotime:
+ *
+ *	Return the boottime, used by sysctl.
+ */
+void
+clock_get_boottime_microtime(
+	clock_sec_t			*secs,
+	clock_usec_t		*microsecs)
+{
+	spl_t	s;
+
+	s = splclock();
+	clock_lock();
+
+	*secs = (clock_sec_t)clock_boottime;
+	*microsecs = (clock_nsec_t)clock_boottime_usec;
+
+	clock_unlock();
+	splx(s);
+}
+
+
+/*
+ *	Wait / delay routines.
+ */
 static void
-clock_alarm_deliver(
-	thread_call_param_t		p0,
-	thread_call_param_t		p1)
-{
-	register alarm_t	alrm;
-	kern_return_t		code;
-	spl_t				s;
-
-	LOCK_CLOCK(s);
-	while (alrm = alrmdone) {
-		if (alrmdone = alrm->al_next)
-			alrmdone->al_prev = (alarm_t) &alrmdone;
-		UNLOCK_CLOCK(s);
-
-		code = (alrm->al_status == ALARM_DONE? KERN_SUCCESS: KERN_ABORTED);
-		if (alrm->al_port != IP_NULL) {
-			/* Deliver message to designated port */
-			if (IP_VALID(alrm->al_port)) {
-				clock_alarm_reply(alrm->al_port, alrm->al_port_type, code,
-								  				alrm->al_type, alrm->al_time);
-			}
+mach_wait_until_continue(
+	__unused void	*parameter,
+	wait_result_t	wresult)
+{
+	thread_syscall_return((wresult == THREAD_INTERRUPTED)? KERN_ABORTED: KERN_SUCCESS);
+	/*NOTREACHED*/
+}
 
-			LOCK_CLOCK(s);
-			alrm->al_status = ALARM_FREE;
-			alrm->al_next = alrmfree;
-			alrmfree = alrm;
-		}
-		else
-			panic("clock_alarm_deliver");
-	}
+/*
+ * mach_wait_until_trap: Suspend execution of calling thread until the specified time has passed
+ *
+ * Parameters:    args->deadline          Amount of time to wait
+ *
+ * Returns:        0                      Success
+ *                !0                      Not success           
+ *
+ */
+kern_return_t
+mach_wait_until_trap(
+	struct mach_wait_until_trap_args	*args)
+{
+	uint64_t		deadline = args->deadline;
+	wait_result_t	wresult;
+
+	wresult = assert_wait_deadline_with_leeway((event_t)mach_wait_until_trap, THREAD_ABORTSAFE,
+						   TIMEOUT_URGENCY_USER_NORMAL, deadline, 0);
+	if (wresult == THREAD_WAITING)
+		wresult = thread_block(mach_wait_until_continue);
+
+	return ((wresult == THREAD_INTERRUPTED)? KERN_ABORTED: KERN_SUCCESS);
+}
+
+void
+clock_delay_until(
+	uint64_t		deadline)
+{
+	uint64_t		now = mach_absolute_time();
+
+	if (now >= deadline)
+		return;
 
-	UNLOCK_CLOCK(s);
+	_clock_delay_until_deadline(deadline - now, deadline);
 }
 
 /*
- * CLOCK PRIVATE SERVICING SUBROUTINES.
+ * Preserve the original precise interval that the client
+ * requested for comparison to the spin threshold.
  */
+void
+_clock_delay_until_deadline(
+	uint64_t		interval,
+	uint64_t		deadline)
+{
+	_clock_delay_until_deadline_with_leeway(interval, deadline, 0);
+}
 
 /*
- * Flush all pending alarms on a clock. All alarms
- * are activated and timestamped correctly, so any
- * programs waiting on alarms/threads will proceed
- * with accurate information.
+ * Like _clock_delay_until_deadline, but it accepts a
+ * leeway value.
  */
-static
 void
-flush_alarms(
-	clock_t				clock)
+_clock_delay_until_deadline_with_leeway(
+	uint64_t		interval,
+	uint64_t		deadline,
+	uint64_t		leeway)
 {
-	register alarm_t	alrm1, alrm2;
-	spl_t				s;
 
-	/*
-	 * Flush all outstanding alarms.
-	 */
-	LOCK_CLOCK(s);
-	alrm1 = (alarm_t) &clock->cl_alarm;
-	while (alrm2 = alrm1->al_next) {
-		/*
-		 * Remove alarm from the clock alarm list.
-		 */  
-		if (alrm1->al_next = alrm2->al_next)
-			(alrm1->al_next)->al_prev = alrm1;
+	if (interval == 0)
+		return;
 
+	if (	ml_delay_should_spin(interval)	||
+			get_preemption_level() != 0				||
+			ml_get_interrupts_enabled() == FALSE	) {
+		machine_delay_until(interval, deadline);
+	} else {
 		/*
-		 * If a clock_sleep() alarm, wakeup the thread
-		 * which issued the clock_sleep() call.
+		 * For now, assume a leeway request of 0 means the client does not want a leeway
+		 * value. We may want to change this interpretation in the future.
 		 */
-		if (alrm2->al_status == ALARM_SLEEP) {
-			alrm2->al_next = 0;
-			thread_wakeup((event_t)alrm2);
-		}
-		else {
-			/*
-			 * If a clock_alarm() alarm, place the alarm on
-			 * the alarm done list and wakeup the dedicated
-			 * kernel alarm_thread to service the alarm.
-			 */
-			assert(alrm2->al_status == ALARM_CLOCK);
-			if (alrm2->al_next = alrmdone)
-				alrmdone->al_prev = alrm2;
-			else
-				thread_wakeup((event_t)&alrmdone);
-			alrm2->al_prev = (alarm_t) &alrmdone;
-			alrmdone = alrm2;
+
+		if (leeway) {
+			assert_wait_deadline_with_leeway((event_t)clock_delay_until, THREAD_UNINT, TIMEOUT_URGENCY_LEEWAY, deadline, leeway);
+		} else {
+			assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline);
 		}
+
+		thread_block(THREAD_CONTINUE_NULL);
 	}
-	UNLOCK_CLOCK(s);
 }
 
-/*
- * Post an alarm on a clock's active alarm list. The alarm is
- * inserted in time-order into the clock's active alarm list.
- * Always called from within a LOCK_CLOCK() code section.
- */
-static
 void
-post_alarm(
-	clock_t				clock,
-	alarm_t				alarm)
+delay_for_interval(
+	uint32_t		interval,
+	uint32_t		scale_factor)
 {
-	register alarm_t	alrm1, alrm2;
-	mach_timespec_t		*alarm_time;
-	mach_timespec_t		*queue_time;
+	uint64_t		abstime;
 
-	/*
-	 * Traverse alarm list until queue time is greater
-	 * than alarm time, then insert alarm.
-	 */
-	alarm_time = &alarm->al_time;
-	alrm1 = (alarm_t) &clock->cl_alarm;
-	while (alrm2 = alrm1->al_next) {
-		queue_time = &alrm2->al_time;
-		if (CMP_MACH_TIMESPEC(queue_time, alarm_time) > 0)
-			break;
-		alrm1 = alrm2;
-	}
-	alrm1->al_next = alarm;
-	alarm->al_next = alrm2;
-	alarm->al_prev = alrm1;
-	if (alrm2)
-		alrm2->al_prev  = alarm;
+	clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
 
-	/*
-	 * If the inserted alarm is the 'earliest' alarm,
-	 * reset the device layer alarm time accordingly.
-	 */
-	if (clock->cl_alarm.al_next == alarm)
-		(*clock->cl_ops->c_setalrm)(alarm_time);
+	_clock_delay_until_deadline(abstime, mach_absolute_time() + abstime);
 }
 
-/*
- * Check the validity of 'alarm_time' and 'alarm_type'. If either
- * argument is invalid, return a negative value. If the 'alarm_time'
- * is now, return a 0 value. If the 'alarm_time' is in the future,
- * return a positive value.
- */
-static
-int
-check_time(
-	alarm_type_t		alarm_type,
-	mach_timespec_t		*alarm_time,
-	mach_timespec_t		*clock_time)
+void
+delay_for_interval_with_leeway(
+	uint32_t		interval,
+	uint32_t		leeway,
+	uint32_t		scale_factor)
 {
-	int					result;
+	uint64_t		abstime_interval;
+	uint64_t		abstime_leeway;
 
-	if (BAD_ALRMTYPE(alarm_type))
-		return (-1);
-	if (BAD_MACH_TIMESPEC(alarm_time))
-		return (-1);
-	if ((alarm_type & ALRMTYPE) == TIME_RELATIVE)
-		ADD_MACH_TIMESPEC(alarm_time, clock_time);
+	clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime_interval);
+	clock_interval_to_absolutetime_interval(leeway, scale_factor, &abstime_leeway);
 
-	result = CMP_MACH_TIMESPEC(alarm_time, clock_time);
+	_clock_delay_until_deadline_with_leeway(abstime_interval, mach_absolute_time() + abstime_interval, abstime_leeway);
+}
 
-	return ((result >= 0)? result: 0);
+void
+delay(
+	int		usec)
+{
+	delay_for_interval((usec < 0)? -usec: usec, NSEC_PER_USEC);
 }
 
-mach_timespec_t
-clock_get_system_value(void)
+/*
+ *	Miscellaneous routines.
+ */
+void
+clock_interval_to_deadline(
+	uint32_t			interval,
+	uint32_t			scale_factor,
+	uint64_t			*result)
 {
-	clock_t				clock = &clock_list[SYSTEM_CLOCK];
-	mach_timespec_t		value;
+	uint64_t	abstime;
 
-	(void) (*clock->cl_ops->c_gettime)(&value);
+	clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
 
-	return value;
+	*result = mach_absolute_time() + abstime;
 }
 
-mach_timespec_t
-clock_get_calendar_value(void)
+void
+clock_absolutetime_interval_to_deadline(
+	uint64_t			abstime,
+	uint64_t			*result)
 {
-	clock_t				clock = &clock_list[CALENDAR_CLOCK];
-	mach_timespec_t		value = MACH_TIMESPEC_ZERO;
+	*result = mach_absolute_time() + abstime;
+}
 
-	(void) (*clock->cl_ops->c_gettime)(&value);
+void
+clock_continuoustime_interval_to_deadline(
+	uint64_t			conttime,
+	uint64_t			*result)
+{
+	*result = mach_continuous_time() + conttime;
+}
 
-	return value;
+void
+clock_get_uptime(
+	uint64_t	*result)
+{
+	*result = mach_absolute_time();
 }
 
 void
@@ -836,120 +1597,144 @@ clock_deadline_for_periodic_event(
 	}
 }
 
-void
-mk_timebase_info(
-	uint32_t			*delta,
-	uint32_t			*abs_to_ns_numer,
-	uint32_t			*abs_to_ns_denom,
-	uint32_t			*proc_to_abs_numer,
-	uint32_t			*proc_to_abs_denom)
+uint64_t
+mach_continuous_time(void)
 {
-	mach_timebase_info_data_t	info;
-	uint32_t					one = 1;
-
-	clock_timebase_info(&info);
-
-	copyout((void *)&one, (void *)delta, sizeof (uint32_t));
-
-	copyout((void *)&info.numer, (void *)abs_to_ns_numer, sizeof (uint32_t));
-	copyout((void *)&info.denom, (void *)abs_to_ns_denom, sizeof (uint32_t));
-
-	copyout((void *)&one, (void *)proc_to_abs_numer, sizeof (uint32_t));
-	copyout((void *)&one, (void *)proc_to_abs_denom, sizeof (uint32_t));
+	while(1) {	
+		uint64_t read1 = mach_absolutetime_asleep;
+		uint64_t absolute = mach_absolute_time();
+		OSMemoryBarrier();
+		uint64_t read2 = mach_absolutetime_asleep;
+
+		if(__builtin_expect(read1 == read2, 1)) {
+			return absolute + read1;
+		}
+	}
 }
 
-kern_return_t
-mach_timebase_info(
-	mach_timebase_info_t	out_info)
+uint64_t
+mach_continuous_approximate_time(void)
 {
-	mach_timebase_info_data_t	info;
-
-	clock_timebase_info(&info);
-
-	copyout((void *)&info, (void *)out_info, sizeof (info));
-
-	return (KERN_SUCCESS);
+	while(1) {
+		uint64_t read1 = mach_absolutetime_asleep;
+		uint64_t absolute = mach_approximate_time();
+		OSMemoryBarrier();
+		uint64_t read2 = mach_absolutetime_asleep;
+
+		if(__builtin_expect(read1 == read2, 1)) {
+			return absolute + read1;
+		}
+	}
 }
 
-kern_return_t
-mach_wait_until(
-	uint64_t		deadline)
-{
-	int				wait_result;
-
-	wait_result = assert_wait((event_t)&mach_wait_until, THREAD_ABORTSAFE);
-	if (wait_result == THREAD_WAITING) {
-		thread_set_timer_deadline(deadline);
-		wait_result = thread_block(THREAD_CONTINUE_NULL);
-		if (wait_result != THREAD_TIMED_OUT)
-			thread_cancel_timer();
-	}
+/*
+ * continuoustime_to_absolutetime
+ * Must be called with interrupts disabled
+ * Returned value is only valid until the next update to
+ * mach_continuous_time 
+ */
+uint64_t
+continuoustime_to_absolutetime(uint64_t conttime) {
+	if (conttime <= mach_absolutetime_asleep)
+		return 0;
+	else
+		return conttime - mach_absolutetime_asleep;
+}
 
-	return ((wait_result == THREAD_INTERRUPTED)? KERN_ABORTED: KERN_SUCCESS);
+/*
+ * absolutetime_to_continuoustime
+ * Must be called with interrupts disabled
+ * Returned value is only valid until the next update to
+ * mach_continuous_time 
+ */
+uint64_t
+absolutetime_to_continuoustime(uint64_t abstime) {
+	return abstime + mach_absolutetime_asleep;
 }
 
+#if	CONFIG_DTRACE
+
+/*
+ * clock_get_calendar_nanotime_nowait
+ *
+ * Description:	Non-blocking version of clock_get_calendar_nanotime()
+ *
+ * Notes:	This function operates by separately tracking calendar time
+ *		updates using a two element structure to copy the calendar
+ *		state, which may be asynchronously modified.  It utilizes
+ *		barrier instructions in the tracking process and in the local
+ *		stable snapshot process in order to ensure that a consistent
+ *		snapshot is used to perform the calculation.
+ */
 void
-clock_adjtime(
-	int32_t		*secs,
-	int32_t		*microsecs)
+clock_get_calendar_nanotime_nowait(
+	clock_sec_t			*secs,
+	clock_nsec_t		*nanosecs)
 {
-	uint32_t	interval;
-	spl_t		s;
+	int i = 0;
+	uint64_t		now;
+	struct unlocked_clock_calend stable;
+	struct bintime bt;
 
-	s = splclock();
-	simple_lock(&calend_adjlock);
+	for (;;) {
+		stable = flipflop[i];		/* take snapshot */
+
+		/*
+		 * Use a barrier instructions to ensure atomicity.  We AND
+		 * off the "in progress" bit to get the current generation
+		 * count.
+		 */
+		(void)hw_atomic_and(&stable.gen, ~(uint32_t)1);
 
-	interval = clock_set_calendar_adjtime(secs, microsecs);
-	if (interval != 0) {
-		if (calend_adjdeadline >= interval)
-			calend_adjdeadline -= interval;
-		clock_deadline_for_periodic_event(interval, mach_absolute_time(),
-												&calend_adjdeadline);
+		/*
+		 * If an update _is_ in progress, the generation count will be
+		 * off by one, if it _was_ in progress, it will be off by two,
+		 * and if we caught it at a good time, it will be equal (and
+		 * our snapshot is threfore stable).
+		 */
+		if (flipflop[i].gen == stable.gen)
+			break;
 
-		timer_call_enter(&calend_adjcall, calend_adjdeadline);
+		/* Switch to the other element of the flipflop, and try again. */
+		i ^= 1;
 	}
-	else
-		timer_call_cancel(&calend_adjcall);
 
-	simple_unlock(&calend_adjlock);
-	splx(s);
+	now = mach_absolute_time();
+
+	bt = get_scaled_time(now);
+
+	bintime_add(&bt, &clock_calend.bintime);
+
+	bintime2nsclock(&bt, secs, nanosecs);
 }
 
-static void
-calend_adjust_call(
-	timer_call_param_t		p0,
-	timer_call_param_t		p1)
+static void 
+clock_track_calend_nowait(void)
 {
-	uint32_t	interval;
-	spl_t		s;
-
-	s = splclock();
-	simple_lock(&calend_adjlock);
+	int i;
 
-	interval = clock_adjust_calendar();
-	if (interval != 0) {
-		clock_deadline_for_periodic_event(interval, mach_absolute_time(),
-								  				&calend_adjdeadline);
+	for (i = 0; i < 2; i++) {
+		struct clock_calend tmp = clock_calend;
 
-		timer_call_enter(&calend_adjcall, calend_adjdeadline);
-	}
+		/*
+		 * Set the low bit if the generation count; since we use a
+		 * barrier instruction to do this, we are guaranteed that this
+		 * will flag an update in progress to an async caller trying
+		 * to examine the contents.
+		 */
+		(void)hw_atomic_or(&flipflop[i].gen, 1);
 
-	simple_unlock(&calend_adjlock);
-	splx(s);
-}
+		flipflop[i].calend = tmp;
 
-void
-clock_wakeup_calendar(void)
-{
-	thread_call_enter(&calend_wakecall);
+		/*
+		 * Increment the generation count to clear the low bit to
+		 * signal completion.  If a caller compares the generation
+		 * count after taking a copy while in progress, the count
+		 * will be off by two.
+		 */
+		(void)hw_atomic_add(&flipflop[i].gen, 1);
+	}
 }
 
-static void
-calend_dowakeup(
-	thread_call_param_t		p0,
-	thread_call_param_t		p1)
-{
-	void		IOKitResetTime(void);
+#endif	/* CONFIG_DTRACE */
 
-	IOKitResetTime();
-}