]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/kern/clock.c
xnu-4570.71.2.tar.gz
[apple/xnu.git] / osfmk / kern / clock.c
index 79b348c776daa7e4814a862b7cac4d48de9af2bb..9bd9f3b0e8249889767ff787b4c489306b4123dd 100644 (file)
@@ -1,16 +1,19 @@
 /*
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
  */
 /*
  */
 /*
  * @OSF_COPYRIGHT@
  */
 /*
- *     File:           kern/clock.c
- *     Purpose:        Routines for the creation and use of kernel
- *                     alarm clock services. This file and the ipc
- *                     routines in kern/ipc_clock.c constitute the
- *                     machine-independent clock service layer.
+ */
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)time.h      8.5 (Berkeley) 5/4/95
+ * $FreeBSD$
  */
 
  */
 
-#include <cpus.h>
-#include <mach_host.h>
-
-#include <mach/boolean.h>
-#include <mach/processor_info.h>
-#include <mach/vm_param.h>
-#include <machine/mach_param.h>
-#include <kern/cpu_number.h>
-#include <kern/misc_protos.h>
-#include <kern/lock.h>
-#include <kern/host.h>
+#include <mach/mach_types.h>
+
 #include <kern/spl.h>
 #include <kern/sched_prim.h>
 #include <kern/thread.h>
 #include <kern/spl.h>
 #include <kern/sched_prim.h>
 #include <kern/thread.h>
-#include <kern/thread_swap.h>
-#include <kern/ipc_host.h>
 #include <kern/clock.h>
 #include <kern/clock.h>
-#include <kern/zalloc.h>
-#include <ipc/ipc_port.h>
+#include <kern/host_notify.h>
+#include <kern/thread_call.h>
+#include <libkern/OSAtomic.h>
+
+#include <IOKit/IOPlatformExpert.h>
 
 
-#include <mach/mach_syscalls.h>
-#include <mach/clock_reply.h>
+#include <machine/commpage.h>
+#include <machine/config.h>
+#include <machine/machine_routines.h>
+
+#include <mach/mach_traps.h>
 #include <mach/mach_time.h>
 
 #include <mach/mach_time.h>
 
-/*
- * Exported interface
- */
+#include <sys/kdebug.h>
+#include <sys/timex.h>
+#include <kern/arithmetic_128.h>
+#include <os/log.h>
+
+uint32_t       hz_tick_interval = 1;
+static uint64_t has_monotonic_clock = 0;
+
+decl_simple_lock_data(,clock_lock)
+lck_grp_attr_t * settime_lock_grp_attr;
+lck_grp_t * settime_lock_grp;
+lck_attr_t * settime_lock_attr;
+lck_mtx_t settime_lock;
+
+#define clock_lock()   \
+       simple_lock(&clock_lock)
+
+#define clock_unlock() \
+       simple_unlock(&clock_lock)
+
+#define clock_lock_init()      \
+       simple_lock_init(&clock_lock, 0)
+
+#ifdef kdp_simple_lock_is_acquired
+boolean_t kdp_clock_is_locked()
+{
+       return kdp_simple_lock_is_acquired(&clock_lock);
+}
+#endif
+
+struct bintime {
+       time_t  sec;
+       uint64_t frac;
+};
+
+static __inline void
+bintime_addx(struct bintime *_bt, uint64_t _x)
+{
+       uint64_t _u;
+
+       _u = _bt->frac;
+       _bt->frac += _x;
+       if (_u > _bt->frac)
+               _bt->sec++;
+}
+
+static __inline void
+bintime_subx(struct bintime *_bt, uint64_t _x)
+{
+       uint64_t _u;
+
+       _u = _bt->frac;
+       _bt->frac -= _x;
+       if (_u < _bt->frac)
+               _bt->sec--;
+}
+
+static __inline void
+bintime_addns(struct bintime *bt, uint64_t ns)
+{
+       bt->sec += ns/ (uint64_t)NSEC_PER_SEC;
+       ns = ns % (uint64_t)NSEC_PER_SEC;
+       if (ns) {
+               /* 18446744073 = int(2^64 / NSEC_PER_SEC) */
+               ns = ns * (uint64_t)18446744073LL;
+               bintime_addx(bt, ns);
+       }
+}
 
 
-#include <mach/clock_server.h>
-#include <mach/mach_host_server.h>
+static __inline void
+bintime_subns(struct bintime *bt, uint64_t ns)
+{
+       bt->sec -= ns/ (uint64_t)NSEC_PER_SEC;
+       ns = ns % (uint64_t)NSEC_PER_SEC;
+       if (ns) {
+               /* 18446744073 = int(2^64 / NSEC_PER_SEC) */
+               ns = ns * (uint64_t)18446744073LL;
+               bintime_subx(bt, ns);
+       }
+}
 
 
-/* local data declarations */
-decl_simple_lock_data(static,ClockLock)                /* clock system synchronization */
-static struct  zone            *alarm_zone;    /* zone for user alarms */
-static struct  alarm           *alrmfree;              /* alarm free list pointer */
-static struct  alarm           *alrmdone;              /* alarm done list pointer */
-static long                                    alrm_seqno;             /* uniquely identifies alarms */
-static thread_call_data_t      alarm_deliver;
+static __inline void
+bintime_addxns(struct bintime *bt, uint64_t a, int64_t xns)
+{
+       uint64_t uxns = (xns > 0)?(uint64_t )xns:(uint64_t)-xns;
+       uint64_t ns = multi_overflow(a, uxns);
+       if (xns > 0) {
+               if (ns)
+                       bintime_addns(bt, ns);
+               ns = (a * uxns) / (uint64_t)NSEC_PER_SEC;
+               bintime_addx(bt, ns);
+       }
+       else{
+               if (ns)
+                       bintime_subns(bt, ns);
+               ns = (a * uxns) / (uint64_t)NSEC_PER_SEC;
+               bintime_subx(bt,ns);
+       }
+}
+
+
+static __inline void
+bintime_add(struct bintime *_bt, const struct bintime *_bt2)
+{
+       uint64_t _u;
 
 
-decl_simple_lock_data(static,calend_adjlock)
+       _u = _bt->frac;
+       _bt->frac += _bt2->frac;
+       if (_u > _bt->frac)
+               _bt->sec++;
+       _bt->sec += _bt2->sec;
+}
+
+static __inline void
+bintime_sub(struct bintime *_bt, const struct bintime *_bt2)
+{
+       uint64_t _u;
 
 
-static timer_call_data_t       calend_adjcall;
-static uint64_t                                calend_adjinterval, calend_adjdeadline;
+       _u = _bt->frac;
+       _bt->frac -= _bt2->frac;
+       if (_u < _bt->frac)
+               _bt->sec--;
+       _bt->sec -= _bt2->sec;
+}
 
 
-static thread_call_data_t      calend_wakecall;
+static __inline void
+clock2bintime(const clock_sec_t *secs, const clock_usec_t *microsecs, struct bintime *_bt)
+{
 
 
-/* backwards compatibility */
-int             hz = HZ;                /* GET RID OF THIS !!! */
-int             tick = (1000000 / HZ);  /* GET RID OF THIS !!! */
+       _bt->sec = *secs;
+       /* 18446744073709 = int(2^64 / 1000000) */
+       _bt->frac = *microsecs * (uint64_t)18446744073709LL;
+}
 
 
-/* external declarations */
-extern struct clock    clock_list[];
-extern int             clock_count;
+static __inline void
+bintime2usclock(const struct bintime *_bt, clock_sec_t *secs, clock_usec_t *microsecs)
+{
 
 
-/* local clock subroutines */
-static
-void   flush_alarms(
-                       clock_t                 clock);
+       *secs = _bt->sec;
+       *microsecs = ((uint64_t)USEC_PER_SEC * (uint32_t)(_bt->frac >> 32)) >> 32;
+}
 
 
-static
-void   post_alarm(
-                       clock_t                 clock,
-                       alarm_t                 alarm);
+static __inline void
+bintime2nsclock(const struct bintime *_bt, clock_sec_t *secs, clock_usec_t *nanosecs)
+{
 
 
-static
-int            check_time(
-                       alarm_type_t    alarm_type,
-                       mach_timespec_t *alarm_time,
-                       mach_timespec_t *clock_time);
+       *secs = _bt->sec;
+       *nanosecs = ((uint64_t)NSEC_PER_SEC * (uint32_t)(_bt->frac >> 32)) >> 32;
+}
 
 
-static
-void   clock_alarm_deliver(
-                       thread_call_param_t             p0,
-                       thread_call_param_t             p1);
+static __inline void
+bintime2absolutetime(const struct bintime *_bt, uint64_t *abs)
+{
+       uint64_t nsec;
+       nsec = (uint64_t) _bt->sec * (uint64_t)NSEC_PER_SEC + (((uint64_t)NSEC_PER_SEC * (uint32_t)(_bt->frac >> 32)) >> 32);
+       nanoseconds_to_absolutetime(nsec, abs);
+}
 
 
-static
-void   calend_adjust_call(
-                       timer_call_param_t      p0,
-                       timer_call_param_t      p1);
+struct latched_time {
+        uint64_t monotonic_time_usec;
+        uint64_t mach_time;
+};
 
 
-static
-void   calend_dowakeup(
-                       thread_call_param_t             p0,
-                       thread_call_param_t             p1);
+extern int
+kernel_sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
 /*
 
 /*
- *     Macros to lock/unlock clock system.
+ *     Time of day (calendar) variables.
+ *
+ *     Algorithm:
+ *
+ *     TOD <- bintime + delta*scale
+ *
+ *     where :
+ *     bintime is a cumulative offset that includes bootime and scaled time elapsed betweed bootime and last scale update.
+ *     delta is ticks elapsed since last scale update.
+ *     scale is computed according to an adjustment provided by ntp_kern.
  */
  */
-#define LOCK_CLOCK(s)                  \
-       s = splclock();                 \
-       simple_lock(&ClockLock);
+static struct clock_calend {
+       uint64_t                s_scale_ns; /* scale to apply for each second elapsed, it converts in ns */
+       int64_t                 s_adj_nsx; /* additional adj to apply for each second elapsed, it is expressed in 64 bit frac of ns */
+       uint64_t                tick_scale_x; /* scale to apply for each tick elapsed, it converts in 64 bit frac of s */
+       uint64_t                offset_count; /* abs time from which apply current scales */
+       struct bintime          offset; /* cumulative offset expressed in (sec, 64 bits frac of a second) */
+       struct bintime          bintime; /* cumulative offset (it includes bootime) expressed in (sec, 64 bits frac of a second) */
+       struct bintime          boottime; /* boot time expressed in (sec, 64 bits frac of a second) */
+       struct bintime          basesleep;
+} clock_calend;
+
+static uint64_t ticks_per_sec; /* ticks in a second (expressed in abs time) */
+
+#if DEVELOPMENT || DEBUG
+clock_sec_t last_utc_sec = 0;
+clock_usec_t last_utc_usec = 0;
+clock_sec_t max_utc_sec = 0;
+clock_sec_t last_sys_sec = 0;
+clock_usec_t last_sys_usec = 0;
+#endif
+
+#if DEVELOPMENT || DEBUG
+extern int g_should_log_clock_adjustments;
+
+static void print_all_clock_variables(const char*, clock_sec_t* pmu_secs, clock_usec_t* pmu_usec, clock_sec_t* sys_secs, clock_usec_t* sys_usec, struct clock_calend* calend_cp);
+static void print_all_clock_variables_internal(const char *, struct clock_calend* calend_cp);
+#else
+#define print_all_clock_variables(...) do { } while (0)
+#define print_all_clock_variables_internal(...) do { } while (0)
+#endif
+
+#if    CONFIG_DTRACE
 
 
-#define UNLOCK_CLOCK(s)                        \
-       simple_unlock(&ClockLock);      \
-       splx(s);
 
 /*
 
 /*
- * Configure the clock system. (Not sure if we need this,
- * as separate from clock_init()).
+ *     Unlocked calendar flipflop; this is used to track a clock_calend such
+ *     that we can safely access a snapshot of a valid  clock_calend structure
+ *     without needing to take any locks to do it.
+ *
+ *     The trick is to use a generation count and set the low bit when it is
+ *     being updated/read; by doing this, we guarantee, through use of the
+ *     hw_atomic functions, that the generation is incremented when the bit
+ *     is cleared atomically (by using a 1 bit add).
+ */
+static struct unlocked_clock_calend {
+       struct clock_calend     calend;         /* copy of calendar */
+       uint32_t                gen;            /* generation count */
+} flipflop[ 2];
+
+static void clock_track_calend_nowait(void);
+
+#endif
+
+void _clock_delay_until_deadline(uint64_t interval, uint64_t deadline);
+void _clock_delay_until_deadline_with_leeway(uint64_t interval, uint64_t deadline, uint64_t leeway);
+
+/* Boottime variables*/
+static uint64_t clock_boottime;
+static uint32_t clock_boottime_usec;
+
+#define TIME_ADD(rsecs, secs, rfrac, frac, unit)       \
+MACRO_BEGIN                                                                                    \
+       if (((rfrac) += (frac)) >= (unit)) {                    \
+               (rfrac) -= (unit);                                                      \
+               (rsecs) += 1;                                                           \
+       }                                                                                               \
+       (rsecs) += (secs);                                                              \
+MACRO_END
+
+#define TIME_SUB(rsecs, secs, rfrac, frac, unit)       \
+MACRO_BEGIN                                                                                    \
+       if ((int)((rfrac) -= (frac)) < 0) {                             \
+               (rfrac) += (unit);                                                      \
+               (rsecs) -= 1;                                                           \
+       }                                                                                               \
+       (rsecs) -= (secs);                                                              \
+MACRO_END
+
+/*
+ *     clock_config:
+ *
+ *     Called once at boot to configure the clock subsystem.
  */
 void
 clock_config(void)
 {
  */
 void
 clock_config(void)
 {
-       clock_t                 clock;
-       register int    i;
-
-       if (cpu_number() != master_cpu)
-               panic("clock_config");
 
 
-       simple_lock_init(&ClockLock, ETAP_MISC_CLOCK);
-       thread_call_setup(&alarm_deliver, clock_alarm_deliver, NULL);
+       clock_lock_init();
 
 
-       simple_lock_init(&calend_adjlock, ETAP_MISC_CLOCK);
-       timer_call_setup(&calend_adjcall, calend_adjust_call, NULL);
+       settime_lock_grp_attr = lck_grp_attr_alloc_init();
+       settime_lock_grp = lck_grp_alloc_init("settime grp", settime_lock_grp_attr);
+       settime_lock_attr = lck_attr_alloc_init();
+       lck_mtx_init(&settime_lock, settime_lock_grp, settime_lock_attr);
 
 
-       thread_call_setup(&calend_wakecall, calend_dowakeup, NULL);
+       clock_oldconfig();
 
 
-       /*
-        * Configure clock devices.
-        */
-       for (i = 0; i < clock_count; i++) {
-               clock = &clock_list[i];
-               if (clock->cl_ops) {
-                       if ((*clock->cl_ops->c_config)() == 0)
-                               clock->cl_ops = 0;
-               }
-       }
+       ntp_init();
 
 
-       /* start alarm sequence numbers at 0 */
-       alrm_seqno = 0;
+       nanoseconds_to_absolutetime((uint64_t)NSEC_PER_SEC, &ticks_per_sec);
 }
 
 /*
 }
 
 /*
- * Initialize the clock system.
+ *     clock_init:
+ *
+ *     Called on a processor each time started.
  */
 void
 clock_init(void)
 {
  */
 void
 clock_init(void)
 {
-       clock_t                 clock;
-       register int    i;
-
-       /*
-        * Initialize basic clock structures.
-        */
-       for (i = 0; i < clock_count; i++) {
-               clock = &clock_list[i];
-               if (clock->cl_ops)
-                       (*clock->cl_ops->c_init)();
-       }
+       clock_oldinit();
 }
 
 /*
 }
 
 /*
- * Called by machine dependent code
- * to initialize areas dependent on the
- * timebase value.  May be called multiple
- * times during start up.
+ *     clock_timebase_init:
+ *
+ *     Called by machine dependent code
+ *     to initialize areas dependent on the
+ *     timebase value.  May be called multiple
+ *     times during start up.
  */
 void
 clock_timebase_init(void)
 {
  */
 void
 clock_timebase_init(void)
 {
+       uint64_t        abstime;
+
+       nanoseconds_to_absolutetime(NSEC_PER_SEC / 100, &abstime);
+       hz_tick_interval = (uint32_t)abstime;
+
        sched_timebase_init();
 }
 
 /*
        sched_timebase_init();
 }
 
 /*
- * Initialize the clock ipc service facility.
+ *     mach_timebase_info_trap:
+ *
+ *     User trap returns timebase constant.
+ */
+kern_return_t
+mach_timebase_info_trap(
+       struct mach_timebase_info_trap_args *args)
+{
+       mach_vm_address_t                       out_info_addr = args->info;
+       mach_timebase_info_data_t       info = {};
+
+       clock_timebase_info(&info);
+
+       copyout((void *)&info, out_info_addr, sizeof (info));
+
+       return (KERN_SUCCESS);
+}
+
+/*
+ *     Calendar routines.
+ */
+
+/*
+ *     clock_get_calendar_microtime:
+ *
+ *     Returns the current calendar value,
+ *     microseconds as the fraction.
  */
 void
  */
 void
-clock_service_create(void)
+clock_get_calendar_microtime(
+       clock_sec_t             *secs,
+       clock_usec_t            *microsecs)
 {
 {
-       clock_t                 clock;
-       register int    i;
+       clock_get_calendar_absolute_and_microtime(secs, microsecs, NULL);
+}
+
+/*
+ * get_scale_factors_from_adj:
+ *
+ * computes scale factors from the value given in adjustment.
+ *
+ * Part of the code has been taken from tc_windup of FreeBSD
+ * written by Poul-Henning Kamp <phk@FreeBSD.ORG>, Julien Ridoux and
+ * Konstantin Belousov.
+ * https://github.com/freebsd/freebsd/blob/master/sys/kern/kern_tc.c
+ */
+static void
+get_scale_factors_from_adj(int64_t adjustment, uint64_t* tick_scale_x, uint64_t* s_scale_ns, int64_t* s_adj_nsx)
+{
+       uint64_t scale;
+       int64_t nano, frac;
+
+       /*-
+        * Calculating the scaling factor.  We want the number of 1/2^64
+        * fractions of a second per period of the hardware counter, taking
+        * into account the th_adjustment factor which the NTP PLL/adjtime(2)
+        * processing provides us with.
+        *
+        * The th_adjustment is nanoseconds per second with 32 bit binary
+        * fraction and we want 64 bit binary fraction of second:
+        *
+        *       x = a * 2^32 / 10^9 = a * 4.294967296
+        *
+        * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
+        * we can only multiply by about 850 without overflowing, that
+        * leaves no suitably precise fractions for multiply before divide.
+        *
+        * Divide before multiply with a fraction of 2199/512 results in a
+        * systematic undercompensation of 10PPM of th_adjustment.  On a
+        * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
+        *
+        * We happily sacrifice the lowest of the 64 bits of our result
+        * to the goddess of code clarity.
+        *
+        */
+       scale = (uint64_t)1 << 63;
+       scale += (adjustment / 1024) * 2199;
+       scale /= ticks_per_sec;
+       *tick_scale_x = scale * 2;
 
        /*
 
        /*
-        * Initialize ipc clock services.
+        * hi part of adj
+        * it contains ns (without fraction) to add to the next sec.
+        * Get ns scale factor for the next sec.
         */
         */
-       for (i = 0; i < clock_count; i++) {
-               clock = &clock_list[i];
-               if (clock->cl_ops) {
-                       ipc_clock_init(clock);
-                       ipc_clock_enable(clock);
-               }
-       }
+       nano = (adjustment > 0)? adjustment >> 32 : -((-adjustment) >> 32);
+       scale = (uint64_t) NSEC_PER_SEC;
+       scale += nano;
+       *s_scale_ns = scale;
 
        /*
 
        /*
-        * Perform miscellaneous late
-        * initialization.
+        * lo part of adj
+        * it contains 32 bit frac of ns to add to the next sec.
+        * Keep it as additional adjustment for the next sec.
         */
         */
-       i = sizeof(struct alarm);
-       alarm_zone = zinit(i, (4096/i)*i, 10*i, "alarms");
+       frac = (adjustment > 0)? ((uint32_t) adjustment) : -((uint32_t) (-adjustment));
+       *s_adj_nsx = (frac>0)? frac << 32 : -( (-frac) << 32);
+
+       return;
 }
 
 /*
 }
 
 /*
- * Get the service port on a clock.
+ * scale_delta:
+ *
+ * returns a bintime struct representing delta scaled accordingly to the
+ * scale factors provided to this function.
  */
  */
-kern_return_t
-host_get_clock_service(
-       host_t                  host,
-       clock_id_t              clock_id,
-       clock_t                 *clock)         /* OUT */
-{
-       if (host == HOST_NULL || clock_id < 0 || clock_id >= clock_count) {
-               *clock = CLOCK_NULL;
-               return (KERN_INVALID_ARGUMENT);
+static struct bintime
+scale_delta(uint64_t delta, uint64_t tick_scale_x, uint64_t s_scale_ns, int64_t s_adj_nsx)
+{
+       uint64_t sec, new_ns, over;
+       struct bintime bt;
+
+       bt.sec = 0;
+       bt.frac = 0;
+
+       /*
+        * If more than one second is elapsed,
+        * scale fully elapsed seconds using scale factors for seconds.
+        * s_scale_ns -> scales sec to ns.
+        * s_adj_nsx -> additional adj expressed in 64 bit frac of ns to apply to each sec.
+        */
+       if (delta > ticks_per_sec) {
+               sec = (delta/ticks_per_sec);
+               new_ns = sec * s_scale_ns;
+               bintime_addns(&bt, new_ns);
+               if (s_adj_nsx) {
+                       if (sec == 1) {
+                               /* shortcut, no overflow can occur */
+                               if (s_adj_nsx > 0)
+                                       bintime_addx(&bt, (uint64_t)s_adj_nsx/ (uint64_t)NSEC_PER_SEC);
+                               else
+                                       bintime_subx(&bt, (uint64_t)-s_adj_nsx/ (uint64_t)NSEC_PER_SEC);
+                       }
+                       else{
+                               /*
+                                * s_adj_nsx is 64 bit frac of ns.
+                                * sec*s_adj_nsx might overflow in int64_t.
+                                * use bintime_addxns to not lose overflowed ns.
+                                */
+                               bintime_addxns(&bt, sec, s_adj_nsx);
+                       }
+               }
+               delta = (delta % ticks_per_sec);
+        }
+
+       over = multi_overflow(tick_scale_x, delta);
+       if(over){
+               bt.sec += over;
        }
 
        }
 
-       *clock = &clock_list[clock_id];
-       if ((*clock)->cl_ops == 0)
-               return (KERN_FAILURE);
-       return (KERN_SUCCESS);
+       /*
+        * scale elapsed ticks using the scale factor for ticks.
+        */
+       bintime_addx(&bt, delta * tick_scale_x);
+
+       return bt;
 }
 
 /*
 }
 
 /*
- * Get the control port on a clock.
+ * get_scaled_time:
+ *
+ * returns the scaled time of the time elapsed from the last time
+ * scale factors were updated to now.
  */
  */
-kern_return_t
-host_get_clock_control(
-       host_priv_t             host_priv,
-       clock_id_t              clock_id,
-       clock_t                 *clock)         /* OUT */
-{
-       if (host_priv == HOST_PRIV_NULL || clock_id < 0 || clock_id >= clock_count) {
-               *clock = CLOCK_NULL;
-               return (KERN_INVALID_ARGUMENT);
-       }
+static struct bintime
+get_scaled_time(uint64_t now)
+{
+       uint64_t delta;
 
 
-       *clock = &clock_list[clock_id];
-       if ((*clock)->cl_ops == 0)
-               return (KERN_FAILURE);
-       return (KERN_SUCCESS);
+       /*
+        * Compute ticks elapsed since last scale update.
+        * This time will be scaled according to the value given by ntp kern.
+        */
+       delta = now - clock_calend.offset_count;
+
+       return scale_delta(delta, clock_calend.tick_scale_x, clock_calend.s_scale_ns, clock_calend.s_adj_nsx);
+}
+
+static void
+clock_get_calendar_absolute_and_microtime_locked(
+       clock_sec_t             *secs,
+       clock_usec_t            *microsecs,
+       uint64_t                *abstime)
+{
+       uint64_t now;
+       struct bintime bt;
+
+       now  = mach_absolute_time();
+       if (abstime)
+               *abstime = now;
+
+       bt = get_scaled_time(now);
+       bintime_add(&bt, &clock_calend.bintime);
+       bintime2usclock(&bt, secs, microsecs);
+}
+
+static void
+clock_get_calendar_absolute_and_nanotime_locked(
+       clock_sec_t             *secs,
+       clock_usec_t            *nanosecs,
+       uint64_t                *abstime)
+{
+       uint64_t now;
+       struct bintime bt;
+
+       now  = mach_absolute_time();
+       if (abstime)
+               *abstime = now;
+
+       bt = get_scaled_time(now);
+       bintime_add(&bt, &clock_calend.bintime);
+       bintime2nsclock(&bt, secs, nanosecs);
 }
 
 /*
 }
 
 /*
- * Get the current clock time.
+ *     clock_get_calendar_absolute_and_microtime:
+ *
+ *     Returns the current calendar value,
+ *     microseconds as the fraction. Also
+ *     returns mach_absolute_time if abstime
+ *     is not NULL.
  */
  */
-kern_return_t
-clock_get_time(
-       clock_t                 clock,
-       mach_timespec_t *cur_time)      /* OUT */
+void
+clock_get_calendar_absolute_and_microtime(
+       clock_sec_t             *secs,
+       clock_usec_t            *microsecs,
+       uint64_t                *abstime)
 {
 {
-       if (clock == CLOCK_NULL)
-               return (KERN_INVALID_ARGUMENT);
-       return ((*clock->cl_ops->c_gettime)(cur_time));
+       spl_t                   s;
+
+       s = splclock();
+       clock_lock();
+
+       clock_get_calendar_absolute_and_microtime_locked(secs, microsecs, abstime);
+
+       clock_unlock();
+       splx(s);
 }
 
 /*
 }
 
 /*
- * Get clock attributes.
+ *     clock_get_calendar_nanotime:
+ *
+ *     Returns the current calendar value,
+ *     nanoseconds as the fraction.
+ *
+ *     Since we do not have an interface to
+ *     set the calendar with resolution greater
+ *     than a microsecond, we honor that here.
  */
  */
-kern_return_t
-clock_get_attributes(
-       clock_t                                 clock,
-       clock_flavor_t                  flavor,
-       clock_attr_t                    attr,           /* OUT */
-       mach_msg_type_number_t  *count)         /* IN/OUT */
-{
-       kern_return_t   (*getattr)(
-                                               clock_flavor_t                  flavor,
-                                               clock_attr_t                    attr,
-                                               mach_msg_type_number_t  *count);
-
-       if (clock == CLOCK_NULL)
-               return (KERN_INVALID_ARGUMENT);
-       if (getattr = clock->cl_ops->c_getattr)
-               return((*getattr)(flavor, attr, count));
-       else
-               return (KERN_FAILURE);
+void
+clock_get_calendar_nanotime(
+       clock_sec_t             *secs,
+       clock_nsec_t            *nanosecs)
+{
+       spl_t                   s;
+
+       s = splclock();
+       clock_lock();
+
+       clock_get_calendar_absolute_and_nanotime_locked(secs, nanosecs, NULL);
+
+       clock_unlock();
+       splx(s);
 }
 
 /*
 }
 
 /*
- * Set the current clock time.
+ *     clock_gettimeofday:
+ *
+ *     Kernel interface for commpage implementation of
+ *     gettimeofday() syscall.
+ *
+ *     Returns the current calendar value, and updates the
+ *     commpage info as appropriate.  Because most calls to
+ *     gettimeofday() are handled in user mode by the commpage,
+ *     this routine should be used infrequently.
  */
  */
-kern_return_t
-clock_set_time(
-       clock_t                 clock,
-       mach_timespec_t new_time)
-{
-       mach_timespec_t *clock_time;
-       kern_return_t   (*settime)(
-                                               mach_timespec_t         *clock_time);
-
-       if (clock == CLOCK_NULL)
-               return (KERN_INVALID_ARGUMENT);
-       if ((settime = clock->cl_ops->c_settime) == 0)
-               return (KERN_FAILURE);
-       clock_time = &new_time;
-       if (BAD_MACH_TIMESPEC(clock_time))
-               return (KERN_INVALID_VALUE);
+void
+clock_gettimeofday(
+       clock_sec_t     *secs,
+       clock_usec_t    *microsecs)
+{
+       clock_gettimeofday_and_absolute_time(secs, microsecs, NULL);
+}
 
 
-       /*
-        * Flush all outstanding alarms.
-        */
-       flush_alarms(clock);
+void
+clock_gettimeofday_and_absolute_time(
+       clock_sec_t     *secs,
+       clock_usec_t    *microsecs,
+       uint64_t        *mach_time)
+{
+       uint64_t                now;
+       spl_t                   s;
+       struct bintime  bt;
+
+       s = splclock();
+       clock_lock();
+
+       now = mach_absolute_time();
+       bt = get_scaled_time(now);
+       bintime_add(&bt, &clock_calend.bintime);
+       bintime2usclock(&bt, secs, microsecs);
 
 
+       clock_gettimeofday_set_commpage(now, bt.sec, bt.frac, clock_calend.tick_scale_x, ticks_per_sec);
+
+       clock_unlock();
+       splx(s);
+
+       if (mach_time) {
+               *mach_time = now;
+       }
+}
+
+static void
+update_basesleep(struct bintime delta, bool forward)
+{
        /*
        /*
-        * Set the new time.
+        * Update basesleep only if the platform does not have monotonic clock.
+        * In that case the sleep time computation will use the PMU time
+        * which offset gets modified by settimeofday.
+        * We don't need this for mononic clock because in that case the sleep
+        * time computation is independent from the offset value of the PMU.
         */
         */
-       return ((*settime)(clock_time));
+       if (!has_monotonic_clock) {
+               if (forward)
+                       bintime_add(&clock_calend.basesleep, &delta);
+               else
+                       bintime_sub(&clock_calend.basesleep, &delta);
+       }
 }
 
 /*
 }
 
 /*
- * Set the clock alarm resolution.
+ *     clock_set_calendar_microtime:
+ *
+ *     Sets the current calendar value by
+ *     recalculating the epoch and offset
+ *     from the system clock.
+ *
+ *     Also adjusts the boottime to keep the
+ *     value consistent, writes the new
+ *     calendar value to the platform clock,
+ *     and sends calendar change notifications.
  */
  */
-kern_return_t
-clock_set_attributes(
-       clock_t                                 clock,
-       clock_flavor_t                  flavor,
-       clock_attr_t                    attr,
-       mach_msg_type_number_t  count)
-{
-       kern_return_t   (*setattr)(
-                                               clock_flavor_t                  flavor,
-                                               clock_attr_t                    attr,
-                                               mach_msg_type_number_t  count);
-
-       if (clock == CLOCK_NULL)
-               return (KERN_INVALID_ARGUMENT);
-       if (setattr = clock->cl_ops->c_setattr)
-               return ((*setattr)(flavor, attr, count));
-       else
-               return (KERN_FAILURE);
-}
+void
+clock_set_calendar_microtime(
+       clock_sec_t             secs,
+       clock_usec_t            microsecs)
+{
+       uint64_t                absolutesys;
+       clock_sec_t             newsecs;
+       clock_sec_t             oldsecs;
+       clock_usec_t            newmicrosecs;
+       clock_usec_t            oldmicrosecs;
+       uint64_t                commpage_value;
+       spl_t                   s;
+       struct bintime          bt;
+       clock_sec_t             deltasecs;
+       clock_usec_t            deltamicrosecs;
+
+       newsecs = secs;
+       newmicrosecs = microsecs;
 
 
-/*
- * Setup a clock alarm.
- */
-kern_return_t
-clock_alarm(
-       clock_t                                 clock,
-       alarm_type_t                    alarm_type,
-       mach_timespec_t                 alarm_time,
-       ipc_port_t                              alarm_port,
-       mach_msg_type_name_t    alarm_port_type)
-{
-       alarm_t                                 alarm;
-       mach_timespec_t                 clock_time;
-       int                                             chkstat;
-       kern_return_t                   reply_code;
-       spl_t                                   s;
-
-       if (clock == CLOCK_NULL)
-               return (KERN_INVALID_ARGUMENT);
-       if (clock->cl_ops->c_setalrm == 0)
-               return (KERN_FAILURE);
-       if (IP_VALID(alarm_port) == 0)
-               return (KERN_INVALID_CAPABILITY);
+       /*
+        * settime_lock mtx is used to avoid that racing settimeofdays update the wall clock and
+        * the platform clock concurrently.
+        *
+        * clock_lock cannot be used for this race because it is acquired from interrupt context
+        * and it needs interrupts disabled while instead updating the platform clock needs to be
+        * called with interrupts enabled.
+        */
+       lck_mtx_lock(&settime_lock);
+
+       s = splclock();
+       clock_lock();
+
+#if DEVELOPMENT || DEBUG
+       struct clock_calend clock_calend_cp = clock_calend;
+#endif
+       commpage_disable_timestamp();
 
        /*
 
        /*
-        * Check alarm parameters. If parameters are invalid,
-        * send alarm message immediately.
+        *      Adjust the boottime based on the delta.
         */
         */
-       (*clock->cl_ops->c_gettime)(&clock_time);
-       chkstat = check_time(alarm_type, &alarm_time, &clock_time);
-       if (chkstat <= 0) {
-               reply_code = (chkstat < 0 ? KERN_INVALID_VALUE : KERN_SUCCESS);
-               clock_alarm_reply(alarm_port, alarm_port_type,
-                                 reply_code, alarm_type, clock_time);
-               return (KERN_SUCCESS);
+       clock_get_calendar_absolute_and_microtime_locked(&oldsecs, &oldmicrosecs, &absolutesys);
+
+#if DEVELOPMENT || DEBUG
+       if (g_should_log_clock_adjustments) {
+               os_log(OS_LOG_DEFAULT, "%s wall %lu s %d u computed with %llu abs\n",
+                      __func__, (unsigned long)oldsecs, oldmicrosecs, absolutesys);
+               os_log(OS_LOG_DEFAULT, "%s requested %lu s %d u\n",
+                      __func__,  (unsigned long)secs, microsecs );
        }
        }
+#endif
+
+       if (oldsecs < secs || (oldsecs == secs && oldmicrosecs < microsecs)) {
+               // moving forwards
+               deltasecs = secs;
+               deltamicrosecs = microsecs;
+
+               TIME_SUB(deltasecs, oldsecs, deltamicrosecs, oldmicrosecs, USEC_PER_SEC);
+
+#if DEVELOPMENT || DEBUG
+               if (g_should_log_clock_adjustments) {
+                       os_log(OS_LOG_DEFAULT, "%s delta requested %lu s %d u\n",
+                              __func__, (unsigned long)deltasecs, deltamicrosecs);
+               }
+#endif
+
+               TIME_ADD(clock_boottime, deltasecs, clock_boottime_usec, deltamicrosecs, USEC_PER_SEC);
+               clock2bintime(&deltasecs, &deltamicrosecs, &bt);
+               bintime_add(&clock_calend.boottime, &bt);
+               update_basesleep(bt, TRUE);
+       } else {
+               // moving backwards
+               deltasecs = oldsecs;
+               deltamicrosecs = oldmicrosecs;
+
+               TIME_SUB(deltasecs, secs, deltamicrosecs, microsecs, USEC_PER_SEC);
+#if DEVELOPMENT || DEBUG
+               if (g_should_log_clock_adjustments) {
+                       os_log(OS_LOG_DEFAULT, "%s negative delta requested %lu s %d u\n",
+                              __func__, (unsigned long)deltasecs, deltamicrosecs);
+               }
+#endif
+
+               TIME_SUB(clock_boottime, deltasecs, clock_boottime_usec, deltamicrosecs, USEC_PER_SEC);
+               clock2bintime(&deltasecs, &deltamicrosecs, &bt);
+               bintime_sub(&clock_calend.boottime, &bt);
+               update_basesleep(bt, FALSE);
+       }
+
+       clock_calend.bintime = clock_calend.boottime;
+       bintime_add(&clock_calend.bintime, &clock_calend.offset);
+
+       clock2bintime((clock_sec_t *) &secs, (clock_usec_t *) &microsecs, &bt);
+
+       clock_gettimeofday_set_commpage(absolutesys, bt.sec, bt.frac, clock_calend.tick_scale_x, ticks_per_sec);
+
+#if DEVELOPMENT || DEBUG
+       struct clock_calend clock_calend_cp1 = clock_calend;
+#endif
+
+       commpage_value = clock_boottime * USEC_PER_SEC + clock_boottime_usec;
+
+       clock_unlock();
+       splx(s);
 
        /*
 
        /*
-        * Get alarm and add to clock alarm list.
+        *      Set the new value for the platform clock.
+        *      This call might block, so interrupts must be enabled.
         */
         */
+#if DEVELOPMENT || DEBUG
+       uint64_t now_b = mach_absolute_time();
+#endif
 
 
-       LOCK_CLOCK(s);
-       if ((alarm = alrmfree) == 0) {
-               UNLOCK_CLOCK(s);
-               alarm = (alarm_t) zalloc(alarm_zone);
-               if (alarm == 0)
-                       return (KERN_RESOURCE_SHORTAGE);
-               LOCK_CLOCK(s);
+       PESetUTCTimeOfDay(newsecs, newmicrosecs);
+
+#if DEVELOPMENT || DEBUG
+       uint64_t now_a = mach_absolute_time();
+       if (g_should_log_clock_adjustments) {
+               os_log(OS_LOG_DEFAULT, "%s mach bef PESet %llu mach aft %llu \n", __func__, now_b, now_a);
        }
        }
-       else
-               alrmfree = alarm->al_next;
-
-       alarm->al_status = ALARM_CLOCK;
-       alarm->al_time = alarm_time;
-       alarm->al_type = alarm_type;
-       alarm->al_port = alarm_port;
-       alarm->al_port_type = alarm_port_type;
-       alarm->al_clock = clock;
-       alarm->al_seqno = alrm_seqno++;
-       post_alarm(clock, alarm);
-       UNLOCK_CLOCK(s);
+#endif
 
 
-       return (KERN_SUCCESS);
+       print_all_clock_variables_internal(__func__, &clock_calend_cp);
+       print_all_clock_variables_internal(__func__, &clock_calend_cp1);
+
+       commpage_update_boottime(commpage_value);
+
+       /*
+        *      Send host notifications.
+        */
+       host_notify_calendar_change();
+       host_notify_calendar_set();
+
+#if CONFIG_DTRACE
+       clock_track_calend_nowait();
+#endif
+
+       lck_mtx_unlock(&settime_lock);
 }
 
 }
 
+uint64_t mach_absolutetime_asleep = 0;
+uint64_t mach_absolutetime_last_sleep = 0;
+
+void
+clock_get_calendar_uptime(clock_sec_t *secs)
+{
+       uint64_t now;
+       spl_t s;
+       struct bintime bt;
+
+       s = splclock();
+       clock_lock();
+
+       now = mach_absolute_time();
+
+       bt = get_scaled_time(now);
+       bintime_add(&bt, &clock_calend.offset);
+
+       *secs = bt.sec;
+
+       clock_unlock();
+       splx(s);
+}
+
+
 /*
 /*
- * Sleep on a clock. System trap. User-level libmach clock_sleep
- * interface call takes a mach_timespec_t sleep_time argument which it
- * converts to sleep_sec and sleep_nsec arguments which are then
- * passed to clock_sleep_trap.
+ * clock_update_calendar:
+ *
+ * called by ntp timer to update scale factors.
  */
  */
-kern_return_t
-clock_sleep_trap(
-       mach_port_name_t        clock_name,
-       sleep_type_t            sleep_type,
-       int                                     sleep_sec,
-       int                                     sleep_nsec,
-       mach_timespec_t         *wakeup_time)
+void
+clock_update_calendar(void)
 {
 {
-       clock_t                         clock;
-       mach_timespec_t         swtime;
-       kern_return_t           rvalue;
 
 
-       /*
-        * Convert the trap parameters.
-        */
-       if (clock_name != MACH_PORT_NULL)
-               clock = port_name_to_clock(clock_name);
-       else
-               clock = &clock_list[SYSTEM_CLOCK];
+       uint64_t now, delta;
+       struct bintime bt;
+       spl_t s;
+       int64_t adjustment;
+
+       s = splclock();
+       clock_lock();
 
 
-       swtime.tv_sec  = sleep_sec;
-       swtime.tv_nsec = sleep_nsec;
+       now  = mach_absolute_time();
 
        /*
 
        /*
-        * Call the actual clock_sleep routine.
+        * scale the time elapsed since the last update and
+        * add it to offset.
         */
         */
-       rvalue = clock_sleep_internal(clock, sleep_type, &swtime);
+       bt = get_scaled_time(now);
+       bintime_add(&clock_calend.offset, &bt);
 
        /*
 
        /*
-        * Return current time as wakeup time.
+        * update the base from which apply next scale factors.
         */
         */
-       if (rvalue != KERN_INVALID_ARGUMENT && rvalue != KERN_FAILURE) {
-               copyout((char *)&swtime, (char *)wakeup_time,
-                       sizeof(mach_timespec_t));
-       }
-       return (rvalue);
-}      
+       delta = now - clock_calend.offset_count;
+       clock_calend.offset_count += delta;
 
 
-/*
- * Kernel internally callable clock sleep routine. The calling
- * thread is suspended until the requested sleep time is reached.
- */
-kern_return_t
-clock_sleep_internal(
-       clock_t                         clock,
-       sleep_type_t            sleep_type,
-       mach_timespec_t         *sleep_time)
-{
-       alarm_t                         alarm;
-       mach_timespec_t         clock_time;
-       kern_return_t           rvalue;
-       int                                     chkstat;
-       spl_t                           s;
-
-       if (clock == CLOCK_NULL)
-               return (KERN_INVALID_ARGUMENT);
-       if (clock->cl_ops->c_setalrm == 0)
-               return (KERN_FAILURE);
+       clock_calend.bintime = clock_calend.offset;
+       bintime_add(&clock_calend.bintime, &clock_calend.boottime);
 
        /*
 
        /*
-        * Check sleep parameters. If parameters are invalid
-        * return an error, otherwise post alarm request.
+        * recompute next adjustment.
         */
         */
-       (*clock->cl_ops->c_gettime)(&clock_time);
+       ntp_update_second(&adjustment, clock_calend.bintime.sec);
 
 
-       chkstat = check_time(sleep_type, sleep_time, &clock_time);
-       if (chkstat < 0)
-               return (KERN_INVALID_VALUE);
-       rvalue = KERN_SUCCESS;
-       if (chkstat > 0) {
-               wait_result_t wait_result;
+#if DEVELOPMENT || DEBUG
+       if (g_should_log_clock_adjustments) {
+               os_log(OS_LOG_DEFAULT, "%s adjustment %lld\n", __func__, adjustment);
+       }
+#endif
+       
+       /*
+        * recomputing scale factors.
+        */
+       get_scale_factors_from_adj(adjustment, &clock_calend.tick_scale_x, &clock_calend.s_scale_ns, &clock_calend.s_adj_nsx);
 
 
-               /*
-                * Get alarm and add to clock alarm list.
-                */
+       clock_gettimeofday_set_commpage(now, clock_calend.bintime.sec, clock_calend.bintime.frac, clock_calend.tick_scale_x, ticks_per_sec);
 
 
-               LOCK_CLOCK(s);
-               if ((alarm = alrmfree) == 0) {
-                       UNLOCK_CLOCK(s);
-                       alarm = (alarm_t) zalloc(alarm_zone);
-                       if (alarm == 0)
-                               return (KERN_RESOURCE_SHORTAGE);
-                       LOCK_CLOCK(s);
-               }
-               else
-                       alrmfree = alarm->al_next;
+#if DEVELOPMENT || DEBUG
+       struct clock_calend calend_cp = clock_calend;
+#endif
 
 
-               /*
-                * Wait for alarm to occur.
-                */
-               wait_result = assert_wait((event_t)alarm, THREAD_ABORTSAFE);
-               if (wait_result == THREAD_WAITING) {
-                       alarm->al_time = *sleep_time;
-                       alarm->al_status = ALARM_SLEEP;
-                       post_alarm(clock, alarm);
-                       UNLOCK_CLOCK(s);
-
-                       wait_result = thread_block(THREAD_CONTINUE_NULL);
-
-                       /*
-                        * Note if alarm expired normally or whether it
-                        * was aborted. If aborted, delete alarm from
-                        * clock alarm list. Return alarm to free list.
-                        */
-                       LOCK_CLOCK(s);
-                       if (alarm->al_status != ALARM_DONE) {
-                               assert(wait_result != THREAD_AWAKENED);
-                               if ((alarm->al_prev)->al_next = alarm->al_next)
-                                       (alarm->al_next)->al_prev = alarm->al_prev;
-                               rvalue = KERN_ABORTED;
-                       }
-                       *sleep_time = alarm->al_time;
-                       alarm->al_status = ALARM_FREE;
-               } else {
-                       assert(wait_result == THREAD_INTERRUPTED);
-                       assert(alarm->al_status == ALARM_FREE);
-                       rvalue = KERN_ABORTED;
-               }
-               alarm->al_next = alrmfree;
-               alrmfree = alarm;
-               UNLOCK_CLOCK(s);
+       clock_unlock();
+       splx(s);
+
+       print_all_clock_variables(__func__, NULL,NULL,NULL,NULL, &calend_cp);
+}
+
+
+#if DEVELOPMENT || DEBUG
+
+void print_all_clock_variables_internal(const char* func, struct clock_calend* clock_calend_cp)
+{
+       clock_sec_t     offset_secs;
+       clock_usec_t    offset_microsecs;
+       clock_sec_t     bintime_secs;
+       clock_usec_t    bintime_microsecs;
+       clock_sec_t     bootime_secs;
+       clock_usec_t    bootime_microsecs;
+       
+       if (!g_should_log_clock_adjustments)
+                return;
+
+       bintime2usclock(&clock_calend_cp->offset, &offset_secs, &offset_microsecs);
+       bintime2usclock(&clock_calend_cp->bintime, &bintime_secs, &bintime_microsecs);
+       bintime2usclock(&clock_calend_cp->boottime, &bootime_secs, &bootime_microsecs);
+
+       os_log(OS_LOG_DEFAULT, "%s s_scale_ns %llu s_adj_nsx %lld tick_scale_x %llu offset_count %llu\n",
+              func , clock_calend_cp->s_scale_ns, clock_calend_cp->s_adj_nsx,
+              clock_calend_cp->tick_scale_x, clock_calend_cp->offset_count);
+       os_log(OS_LOG_DEFAULT, "%s offset.sec %ld offset.frac %llu offset_secs %lu offset_microsecs %d\n",
+              func, clock_calend_cp->offset.sec, clock_calend_cp->offset.frac,
+              (unsigned long)offset_secs, offset_microsecs);
+       os_log(OS_LOG_DEFAULT, "%s bintime.sec %ld bintime.frac %llu bintime_secs %lu bintime_microsecs %d\n",
+              func, clock_calend_cp->bintime.sec, clock_calend_cp->bintime.frac,
+              (unsigned long)bintime_secs, bintime_microsecs);
+       os_log(OS_LOG_DEFAULT, "%s bootime.sec %ld bootime.frac %llu bootime_secs %lu bootime_microsecs %d\n",
+              func, clock_calend_cp->boottime.sec, clock_calend_cp->boottime.frac,
+              (unsigned long)bootime_secs, bootime_microsecs);
+
+       clock_sec_t     basesleep_secs;
+        clock_usec_t    basesleep_microsecs;
+       
+       bintime2usclock(&clock_calend_cp->basesleep, &basesleep_secs, &basesleep_microsecs);
+       os_log(OS_LOG_DEFAULT, "%s basesleep.sec %ld basesleep.frac %llu basesleep_secs %lu basesleep_microsecs %d\n",
+              func, clock_calend_cp->basesleep.sec, clock_calend_cp->basesleep.frac,
+              (unsigned long)basesleep_secs, basesleep_microsecs);
+
+}
+
+
+void print_all_clock_variables(const char* func, clock_sec_t* pmu_secs, clock_usec_t* pmu_usec, clock_sec_t* sys_secs, clock_usec_t* sys_usec, struct clock_calend* clock_calend_cp)
+{
+       if (!g_should_log_clock_adjustments)
+               return;
+
+       struct bintime  bt;
+       clock_sec_t     wall_secs;
+       clock_usec_t    wall_microsecs;
+       uint64_t now;
+       uint64_t delta;
+
+       if (pmu_secs) {
+               os_log(OS_LOG_DEFAULT, "%s PMU %lu s %d u \n", func, (unsigned long)*pmu_secs, *pmu_usec); 
        }
        }
-       else
-               *sleep_time = clock_time;
+       if (sys_secs) {
+               os_log(OS_LOG_DEFAULT, "%s sys %lu s %d u \n", func, (unsigned long)*sys_secs, *sys_usec);
+       }
+
+       print_all_clock_variables_internal(func, clock_calend_cp);
+
+       now = mach_absolute_time();
+        delta = now - clock_calend_cp->offset_count;
+
+        bt = scale_delta(delta, clock_calend_cp->tick_scale_x, clock_calend_cp->s_scale_ns, clock_calend_cp->s_adj_nsx);
+       bintime_add(&bt, &clock_calend_cp->bintime);
+       bintime2usclock(&bt, &wall_secs, &wall_microsecs);
 
 
-       return (rvalue);
+       os_log(OS_LOG_DEFAULT, "%s wall %lu s %d u computed with %llu abs\n",
+              func, (unsigned long)wall_secs, wall_microsecs, now);
 }
 
 }
 
-/*
- * CLOCK INTERRUPT SERVICE ROUTINES.
- */
+
+#endif /* DEVELOPMENT || DEBUG */
+
 
 /*
 
 /*
- * Service clock alarm interrupts. Called from machine dependent
- * layer at splclock(). The clock_id argument specifies the clock,
- * and the clock_time argument gives that clock's current time.
+ *     clock_initialize_calendar:
+ *
+ *     Set the calendar and related clocks
+ *     from the platform clock at boot.
+ *
+ *     Also sends host notifications.
  */
 void
  */
 void
-clock_alarm_intr(
-       clock_id_t                      clock_id,
-       mach_timespec_t         *clock_time)
+clock_initialize_calendar(void)
 {
 {
-       clock_t                         clock;
-       register alarm_t        alrm1;
-       register alarm_t        alrm2;
-       mach_timespec_t         *alarm_time;
-       spl_t                           s;
+       clock_sec_t             sys;  // sleepless time since boot in seconds
+       clock_sec_t             secs; // Current UTC time
+       clock_sec_t             utc_offset_secs; // Difference in current UTC time and sleepless time since boot
+       clock_usec_t            microsys;  
+       clock_usec_t            microsecs; 
+       clock_usec_t            utc_offset_microsecs; 
+       spl_t                   s;
+       struct bintime          bt;
+       struct bintime          monotonic_bt;
+       struct latched_time     monotonic_time;
+       uint64_t                monotonic_usec_total;
+       clock_sec_t             sys2, monotonic_sec;
+        clock_usec_t            microsys2, monotonic_usec;
+        size_t                  size;
+
+       //Get PMU time with offset and corresponding sys time
+       PEGetUTCTimeOfDay(&secs, &microsecs);
+       clock_get_system_microtime(&sys, &microsys);
+
+       /*
+        * If the platform has a monotonic clock, use kern.monotonicclock_usecs
+        * to estimate the sleep/wake time, otherwise use the PMU and adjustments
+        * provided through settimeofday to estimate the sleep time.
+        * NOTE: the latter case relies that the kernel is the only component
+        * to set the PMU offset.
+        */
+       size = sizeof(monotonic_time);
+       if (kernel_sysctlbyname("kern.monotonicclock_usecs", &monotonic_time, &size, NULL, 0) != 0) {
+               has_monotonic_clock = 0;
+               os_log(OS_LOG_DEFAULT, "%s system does not have monotonic clock.\n", __func__);
+       } else {
+               has_monotonic_clock = 1;
+               monotonic_usec_total = monotonic_time.monotonic_time_usec;
+               absolutetime_to_microtime(monotonic_time.mach_time, &sys2, &microsys2);
+               os_log(OS_LOG_DEFAULT, "%s system has monotonic clock.\n", __func__);
+       }
+
+       s = splclock();
+       clock_lock();
+
+       commpage_disable_timestamp();
 
 
-       clock = &clock_list[clock_id];
+       utc_offset_secs = secs;
+       utc_offset_microsecs = microsecs;
+
+#if DEVELOPMENT || DEBUG
+       last_utc_sec = secs;
+       last_utc_usec = microsecs;
+       last_sys_sec = sys;
+       last_sys_usec = microsys;
+       if (secs > max_utc_sec)
+               max_utc_sec = secs;
+#endif
 
        /*
 
        /*
-        * Update clock alarm list. All alarms that are due are moved
-        * to the alarmdone list to be serviced by the alarm_thread.
+        * We normally expect the UTC clock to be always-on and produce
+        * greater readings than the tick counter.  There may be corner cases
+        * due to differing clock resolutions (UTC clock is likely lower) and
+        * and errors reading the UTC clock (some implementations return 0
+        * on error) in which that doesn't hold true.  Bring the UTC measurements
+        * in-line with the tick counter measurements as a best effort in that case.
         */
         */
+       //FIXME if the current time is prior than 1970 secs will be negative
+       if ((sys > secs) || ((sys == secs) && (microsys > microsecs))) {
+               os_log(OS_LOG_DEFAULT, "%s WARNING: PMU offset is less then sys PMU %lu s %d u sys %lu s %d u\n",
+                       __func__, (unsigned long) secs, microsecs, (unsigned long)sys, microsys);
+               secs = utc_offset_secs = sys;
+               microsecs = utc_offset_microsecs = microsys;
+       }
 
 
-       LOCK_CLOCK(s);
-       alrm1 = (alarm_t) &clock->cl_alarm;
-       while (alrm2 = alrm1->al_next) {
-               alarm_time = &alrm2->al_time;
-               if (CMP_MACH_TIMESPEC(alarm_time, clock_time) > 0)
-                       break;
+       // PMU time with offset - sys
+       // This macro stores the subtraction result in utc_offset_secs and utc_offset_microsecs
+       TIME_SUB(utc_offset_secs, sys, utc_offset_microsecs, microsys, USEC_PER_SEC);
 
 
-               /*
-                * Alarm has expired, so remove it from the
-                * clock alarm list.
-                */  
-               if (alrm1->al_next = alrm2->al_next)
-                       (alrm1->al_next)->al_prev = alrm1;
+       clock2bintime(&utc_offset_secs, &utc_offset_microsecs, &bt);
 
 
-               /*
-                * If a clock_sleep() alarm, wakeup the thread
-                * which issued the clock_sleep() call.
-                */
-               if (alrm2->al_status == ALARM_SLEEP) {
-                       alrm2->al_next = 0;
-                       alrm2->al_status = ALARM_DONE;
-                       alrm2->al_time = *clock_time;
-                       thread_wakeup((event_t)alrm2);
-               }
+       /*
+        *      Initialize the boot time based on the platform clock.
+        */
+       clock_boottime = secs;
+       clock_boottime_usec = microsecs;
+       commpage_update_boottime(clock_boottime * USEC_PER_SEC + clock_boottime_usec);
+
+       nanoseconds_to_absolutetime((uint64_t)NSEC_PER_SEC, &ticks_per_sec);
+       clock_calend.boottime = bt;
+       clock_calend.bintime = bt;
+       clock_calend.offset.sec = 0;
+       clock_calend.offset.frac = 0;
+
+       clock_calend.tick_scale_x = (uint64_t)1 << 63;
+       clock_calend.tick_scale_x /= ticks_per_sec;
+       clock_calend.tick_scale_x *= 2;
+
+       clock_calend.s_scale_ns = NSEC_PER_SEC;
+       clock_calend.s_adj_nsx = 0;
+
+       if (has_monotonic_clock) {
+
+               monotonic_sec = monotonic_usec_total / (clock_sec_t)USEC_PER_SEC;
+               monotonic_usec = monotonic_usec_total % (clock_usec_t)USEC_PER_SEC;
+
+               // PMU time without offset - sys
+               // This macro stores the subtraction result in monotonic_sec and monotonic_usec
+               TIME_SUB(monotonic_sec, sys2, monotonic_usec, microsys2, USEC_PER_SEC);
+               clock2bintime(&monotonic_sec, &monotonic_usec, &monotonic_bt);
+
+               // set the baseleep as the difference between monotonic clock - sys
+               clock_calend.basesleep = monotonic_bt;
+       } else {
+               // set the baseleep as the difference between PMU clock - sys
+               clock_calend.basesleep = bt;
+       }
+       commpage_update_mach_continuous_time(mach_absolutetime_asleep);
 
 
-               /*
-                * If a clock_alarm() alarm, place the alarm on
-                * the alarm done list and schedule the alarm
-                * delivery mechanism.
-                */
-               else {
-                       assert(alrm2->al_status == ALARM_CLOCK);
-                       if (alrm2->al_next = alrmdone)
-                               alrmdone->al_prev = alrm2;
-                       else
-                               thread_call_enter(&alarm_deliver);
-                       alrm2->al_prev = (alarm_t) &alrmdone;
-                       alrmdone = alrm2;
-                       alrm2->al_status = ALARM_DONE;
-                       alrm2->al_time = *clock_time;
+#if DEVELOPMENT || DEBUG
+       struct clock_calend clock_calend_cp = clock_calend;
+#endif
+
+       clock_unlock();
+       splx(s);
+
+        print_all_clock_variables(__func__, &secs, &microsecs, &sys, &microsys, &clock_calend_cp);
+
+       /*
+        *      Send host notifications.
+        */
+       host_notify_calendar_change();
+       
+#if CONFIG_DTRACE
+       clock_track_calend_nowait();
+#endif
+}
+
+
+void
+clock_wakeup_calendar(void)
+{
+       clock_sec_t             sys;
+       clock_sec_t             secs;
+       clock_usec_t            microsys;
+       clock_usec_t            microsecs;
+       spl_t                   s;
+       struct bintime          bt, last_sleep_bt;
+       clock_sec_t             basesleep_s, last_sleep_sec;
+       clock_usec_t            basesleep_us, last_sleep_usec;
+       struct latched_time     monotonic_time;
+       uint64_t                monotonic_usec_total;
+       size_t                  size;
+       clock_sec_t secs_copy;
+        clock_usec_t microsecs_copy;
+#if DEVELOPMENT || DEBUG
+       clock_sec_t utc_sec;
+       clock_usec_t utc_usec;
+       PEGetUTCTimeOfDay(&utc_sec, &utc_usec);
+#endif
+
+       /*
+        * If the platform has the monotonic clock use that to
+        * compute the sleep time. The monotonic clock does not have an offset
+        * that can be modified, so nor kernel or userspace can change the time
+        * of this clock, it can only monotonically increase over time.
+        * During sleep mach_absolute_time does not tick,
+        * so the sleep time is the difference betwen the current monotonic time
+        * less the absolute time and the previous difference stored at wake time.
+        *
+        * basesleep = monotonic - sys ---> computed at last wake
+        * sleep_time = (monotonic - sys) - basesleep
+        *
+        * If the platform does not support monotonic time we use the PMU time
+        * to compute the last sleep.
+        * The PMU time is the monotonic clock + an offset that can be set
+        * by kernel.
+        *
+        * IMPORTANT:
+        * We assume that only the kernel is setting the offset of the PMU and that
+        * it is doing it only througth the settimeofday interface.
+        *
+        * basesleep is the different between the PMU time and the mach_absolute_time
+        * at wake.
+        * During awake time settimeofday can change the PMU offset by a delta,
+        * and basesleep is shifted by the same delta applyed to the PMU. So the sleep
+        * time computation becomes:
+        *
+        * PMU = monotonic + PMU_offset
+        * basesleep = PMU - sys ---> computed at last wake
+        * basesleep += settimeofday_delta
+        * PMU_offset += settimeofday_delta
+        * sleep_time = (PMU - sys) - basesleep
+        */
+       if (has_monotonic_clock) {
+               //Get monotonic time with corresponding sys time
+               size = sizeof(monotonic_time);
+               if (kernel_sysctlbyname("kern.monotonicclock_usecs", &monotonic_time, &size, NULL, 0) != 0) {
+                       panic("%s: could not call kern.monotonicclock_usecs", __func__);
                }
                }
+               monotonic_usec_total = monotonic_time.monotonic_time_usec;
+               absolutetime_to_microtime(monotonic_time.mach_time, &sys, &microsys);
+
+               secs = monotonic_usec_total / (clock_sec_t)USEC_PER_SEC;
+               microsecs = monotonic_usec_total % (clock_usec_t)USEC_PER_SEC;
+       } else {
+               //Get PMU time with offset and corresponding sys time
+               PEGetUTCTimeOfDay(&secs, &microsecs);
+               clock_get_system_microtime(&sys, &microsys);
+
        }
 
        }
 
+       s = splclock();
+       clock_lock();
+       
+       commpage_disable_timestamp();
+
+       secs_copy = secs;
+       microsecs_copy = microsecs;
+
+#if DEVELOPMENT || DEBUG
+       struct clock_calend clock_calend_cp1 = clock_calend;
+#endif /* DEVELOPMENT || DEBUG */
+
+#if DEVELOPMENT || DEBUG
+       last_utc_sec = secs;
+       last_utc_usec = microsecs;
+       last_sys_sec = sys;
+       last_sys_usec = microsys;
+       if (secs > max_utc_sec)
+               max_utc_sec = secs;
+#endif
        /*
        /*
-        * Setup the clock dependent layer to deliver another
-        * interrupt for the next pending alarm.
+        * We normally expect the UTC clock to be always-on and produce
+        * greater readings than the tick counter.  There may be corner cases
+        * due to differing clock resolutions (UTC clock is likely lower) and
+        * and errors reading the UTC clock (some implementations return 0
+        * on error) in which that doesn't hold true.  Bring the UTC measurements
+        * in-line with the tick counter measurements as a best effort in that case.
         */
         */
-       if (alrm2)
-               (*clock->cl_ops->c_setalrm)(alarm_time);
-       UNLOCK_CLOCK(s);
+       //FIXME if the current time is prior than 1970 secs will be negative
+       if ((sys > secs) || ((sys == secs) && (microsys > microsecs))) {
+               os_log(OS_LOG_DEFAULT, "%s WARNING: %s is less then sys %s %lu s %d u sys %lu s %d u\n",
+                       __func__, (has_monotonic_clock)?"monotonic":"PMU", (has_monotonic_clock)?"monotonic":"PMU", (unsigned long)secs, microsecs, (unsigned long)sys, microsys);
+               secs = sys;
+               microsecs = microsys;
+       }
+
+       // PMU or monotonic - sys
+       // This macro stores the subtraction result in secs and microsecs
+       TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC);
+       clock2bintime(&secs, &microsecs, &bt);
+
+       /*
+        * Safety belt: the UTC clock will likely have a lower resolution than the tick counter.
+        * It's also possible that the device didn't fully transition to the powered-off state on
+        * the most recent sleep, so the tick counter may not have reset or may have only briefly
+        * tured off.  In that case it's possible for the difference between the UTC clock and the
+        * tick counter to be less than the previously recorded value in clock.calend.basesleep.
+        * In that case simply record that we slept for 0 ticks.
+        */ 
+       if ((bt.sec > clock_calend.basesleep.sec) ||
+           ((bt.sec == clock_calend.basesleep.sec) && (bt.frac > clock_calend.basesleep.frac))) {
+
+               //last_sleep is the difference between current PMU or monotonic - abs and last wake PMU or monotonic - abs
+               last_sleep_bt = bt;
+               bintime_sub(&last_sleep_bt, &clock_calend.basesleep);
+
+               //set baseseep to current PMU or monotonic - abs
+               clock_calend.basesleep = bt;
+               bintime2usclock(&last_sleep_bt, &last_sleep_sec, &last_sleep_usec);
+               bintime2absolutetime(&last_sleep_bt, &mach_absolutetime_last_sleep);
+               mach_absolutetime_asleep += mach_absolutetime_last_sleep;
+
+               bintime_add(&clock_calend.offset, &last_sleep_bt);
+               bintime_add(&clock_calend.bintime, &last_sleep_bt);
+
+       } else{
+               mach_absolutetime_last_sleep = 0;
+               last_sleep_sec = last_sleep_usec = 0;
+               bintime2usclock(&clock_calend.basesleep, &basesleep_s, &basesleep_us);
+               os_log(OS_LOG_DEFAULT, "%s WARNING: basesleep (%lu s %d u)  > %s-sys (%lu s %d u) \n",
+                       __func__, (unsigned long) basesleep_s, basesleep_us, (has_monotonic_clock)?"monotonic":"PMU", (unsigned long) secs_copy, microsecs_copy );
+       }
+
+       KERNEL_DEBUG_CONSTANT(
+                 MACHDBG_CODE(DBG_MACH_CLOCK,MACH_EPOCH_CHANGE) | DBG_FUNC_NONE,
+                 (uintptr_t) mach_absolutetime_last_sleep,
+                 (uintptr_t) mach_absolutetime_asleep,
+                 (uintptr_t) (mach_absolutetime_last_sleep >> 32),
+                 (uintptr_t) (mach_absolutetime_asleep >> 32),
+                 0);
+
+       commpage_update_mach_continuous_time(mach_absolutetime_asleep);
+       adjust_cont_time_thread_calls();
+
+#if DEVELOPMENT || DEBUG
+       struct clock_calend clock_calend_cp = clock_calend;
+#endif
+
+       clock_unlock();
+       splx(s);
+
+#if DEVELOPMENT || DEBUG
+       if (g_should_log_clock_adjustments) {
+               os_log(OS_LOG_DEFAULT, "PMU was %lu s %d u\n",(unsigned long) utc_sec, utc_usec);
+               os_log(OS_LOG_DEFAULT, "last sleep was %lu s %d u\n",(unsigned long) last_sleep_sec, last_sleep_usec);
+               print_all_clock_variables("clock_wakeup_calendar:BEFORE",
+                                 &secs_copy, &microsecs_copy, &sys, &microsys, &clock_calend_cp1);
+               print_all_clock_variables("clock_wakeup_calendar:AFTER", NULL, NULL, NULL, NULL, &clock_calend_cp);
+       }
+#endif /* DEVELOPMENT || DEBUG */
+
+       host_notify_calendar_change();
+
+#if CONFIG_DTRACE
+       clock_track_calend_nowait();
+#endif
 }
 
 }
 
+
 /*
 /*
- * ALARM DELIVERY ROUTINES.
+ *     clock_get_boottime_nanotime:
+ *
+ *     Return the boottime, used by sysctl.
  */
  */
+void
+clock_get_boottime_nanotime(
+       clock_sec_t                     *secs,
+       clock_nsec_t            *nanosecs)
+{
+       spl_t   s;
 
 
+       s = splclock();
+       clock_lock();
+
+       *secs = (clock_sec_t)clock_boottime;
+       *nanosecs = (clock_nsec_t)clock_boottime_usec * NSEC_PER_USEC;
+
+       clock_unlock();
+       splx(s);
+}
+
+/*
+ *     clock_get_boottime_nanotime:
+ *
+ *     Return the boottime, used by sysctl.
+ */
+void
+clock_get_boottime_microtime(
+       clock_sec_t                     *secs,
+       clock_usec_t            *microsecs)
+{
+       spl_t   s;
+
+       s = splclock();
+       clock_lock();
+
+       *secs = (clock_sec_t)clock_boottime;
+       *microsecs = (clock_nsec_t)clock_boottime_usec;
+
+       clock_unlock();
+       splx(s);
+}
+
+
+/*
+ *     Wait / delay routines.
+ */
 static void
 static void
-clock_alarm_deliver(
-       thread_call_param_t             p0,
-       thread_call_param_t             p1)
-{
-       register alarm_t        alrm;
-       kern_return_t           code;
-       spl_t                           s;
-
-       LOCK_CLOCK(s);
-       while (alrm = alrmdone) {
-               if (alrmdone = alrm->al_next)
-                       alrmdone->al_prev = (alarm_t) &alrmdone;
-               UNLOCK_CLOCK(s);
-
-               code = (alrm->al_status == ALARM_DONE? KERN_SUCCESS: KERN_ABORTED);
-               if (alrm->al_port != IP_NULL) {
-                       /* Deliver message to designated port */
-                       if (IP_VALID(alrm->al_port)) {
-                               clock_alarm_reply(alrm->al_port, alrm->al_port_type, code,
-                                                                                               alrm->al_type, alrm->al_time);
-                       }
+mach_wait_until_continue(
+       __unused void   *parameter,
+       wait_result_t   wresult)
+{
+       thread_syscall_return((wresult == THREAD_INTERRUPTED)? KERN_ABORTED: KERN_SUCCESS);
+       /*NOTREACHED*/
+}
 
 
-                       LOCK_CLOCK(s);
-                       alrm->al_status = ALARM_FREE;
-                       alrm->al_next = alrmfree;
-                       alrmfree = alrm;
-               }
-               else
-                       panic("clock_alarm_deliver");
-       }
+/*
+ * mach_wait_until_trap: Suspend execution of calling thread until the specified time has passed
+ *
+ * Parameters:    args->deadline          Amount of time to wait
+ *
+ * Returns:        0                      Success
+ *                !0                      Not success           
+ *
+ */
+kern_return_t
+mach_wait_until_trap(
+       struct mach_wait_until_trap_args        *args)
+{
+       uint64_t                deadline = args->deadline;
+       wait_result_t   wresult;
+
+       wresult = assert_wait_deadline_with_leeway((event_t)mach_wait_until_trap, THREAD_ABORTSAFE,
+                                                  TIMEOUT_URGENCY_USER_NORMAL, deadline, 0);
+       if (wresult == THREAD_WAITING)
+               wresult = thread_block(mach_wait_until_continue);
+
+       return ((wresult == THREAD_INTERRUPTED)? KERN_ABORTED: KERN_SUCCESS);
+}
+
+void
+clock_delay_until(
+       uint64_t                deadline)
+{
+       uint64_t                now = mach_absolute_time();
+
+       if (now >= deadline)
+               return;
 
 
-       UNLOCK_CLOCK(s);
+       _clock_delay_until_deadline(deadline - now, deadline);
 }
 
 /*
 }
 
 /*
- * CLOCK PRIVATE SERVICING SUBROUTINES.
+ * Preserve the original precise interval that the client
+ * requested for comparison to the spin threshold.
  */
  */
+void
+_clock_delay_until_deadline(
+       uint64_t                interval,
+       uint64_t                deadline)
+{
+       _clock_delay_until_deadline_with_leeway(interval, deadline, 0);
+}
 
 /*
 
 /*
- * Flush all pending alarms on a clock. All alarms
- * are activated and timestamped correctly, so any
- * programs waiting on alarms/threads will proceed
- * with accurate information.
+ * Like _clock_delay_until_deadline, but it accepts a
+ * leeway value.
  */
  */
-static
 void
 void
-flush_alarms(
-       clock_t                         clock)
+_clock_delay_until_deadline_with_leeway(
+       uint64_t                interval,
+       uint64_t                deadline,
+       uint64_t                leeway)
 {
 {
-       register alarm_t        alrm1, alrm2;
-       spl_t                           s;
 
 
-       /*
-        * Flush all outstanding alarms.
-        */
-       LOCK_CLOCK(s);
-       alrm1 = (alarm_t) &clock->cl_alarm;
-       while (alrm2 = alrm1->al_next) {
-               /*
-                * Remove alarm from the clock alarm list.
-                */  
-               if (alrm1->al_next = alrm2->al_next)
-                       (alrm1->al_next)->al_prev = alrm1;
+       if (interval == 0)
+               return;
 
 
+       if (    ml_delay_should_spin(interval)  ||
+                       get_preemption_level() != 0                             ||
+                       ml_get_interrupts_enabled() == FALSE    ) {
+               machine_delay_until(interval, deadline);
+       } else {
                /*
                /*
-                * If a clock_sleep() alarm, wakeup the thread
-                * which issued the clock_sleep() call.
+                * For now, assume a leeway request of 0 means the client does not want a leeway
+                * value. We may want to change this interpretation in the future.
                 */
                 */
-               if (alrm2->al_status == ALARM_SLEEP) {
-                       alrm2->al_next = 0;
-                       thread_wakeup((event_t)alrm2);
-               }
-               else {
-                       /*
-                        * If a clock_alarm() alarm, place the alarm on
-                        * the alarm done list and wakeup the dedicated
-                        * kernel alarm_thread to service the alarm.
-                        */
-                       assert(alrm2->al_status == ALARM_CLOCK);
-                       if (alrm2->al_next = alrmdone)
-                               alrmdone->al_prev = alrm2;
-                       else
-                               thread_wakeup((event_t)&alrmdone);
-                       alrm2->al_prev = (alarm_t) &alrmdone;
-                       alrmdone = alrm2;
+
+               if (leeway) {
+                       assert_wait_deadline_with_leeway((event_t)clock_delay_until, THREAD_UNINT, TIMEOUT_URGENCY_LEEWAY, deadline, leeway);
+               } else {
+                       assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline);
                }
                }
+
+               thread_block(THREAD_CONTINUE_NULL);
        }
        }
-       UNLOCK_CLOCK(s);
 }
 
 }
 
-/*
- * Post an alarm on a clock's active alarm list. The alarm is
- * inserted in time-order into the clock's active alarm list.
- * Always called from within a LOCK_CLOCK() code section.
- */
-static
 void
 void
-post_alarm(
-       clock_t                         clock,
-       alarm_t                         alarm)
+delay_for_interval(
+       uint32_t                interval,
+       uint32_t                scale_factor)
 {
 {
-       register alarm_t        alrm1, alrm2;
-       mach_timespec_t         *alarm_time;
-       mach_timespec_t         *queue_time;
+       uint64_t                abstime;
 
 
-       /*
-        * Traverse alarm list until queue time is greater
-        * than alarm time, then insert alarm.
-        */
-       alarm_time = &alarm->al_time;
-       alrm1 = (alarm_t) &clock->cl_alarm;
-       while (alrm2 = alrm1->al_next) {
-               queue_time = &alrm2->al_time;
-               if (CMP_MACH_TIMESPEC(queue_time, alarm_time) > 0)
-                       break;
-               alrm1 = alrm2;
-       }
-       alrm1->al_next = alarm;
-       alarm->al_next = alrm2;
-       alarm->al_prev = alrm1;
-       if (alrm2)
-               alrm2->al_prev  = alarm;
+       clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
 
 
-       /*
-        * If the inserted alarm is the 'earliest' alarm,
-        * reset the device layer alarm time accordingly.
-        */
-       if (clock->cl_alarm.al_next == alarm)
-               (*clock->cl_ops->c_setalrm)(alarm_time);
+       _clock_delay_until_deadline(abstime, mach_absolute_time() + abstime);
 }
 
 }
 
-/*
- * Check the validity of 'alarm_time' and 'alarm_type'. If either
- * argument is invalid, return a negative value. If the 'alarm_time'
- * is now, return a 0 value. If the 'alarm_time' is in the future,
- * return a positive value.
- */
-static
-int
-check_time(
-       alarm_type_t            alarm_type,
-       mach_timespec_t         *alarm_time,
-       mach_timespec_t         *clock_time)
+void
+delay_for_interval_with_leeway(
+       uint32_t                interval,
+       uint32_t                leeway,
+       uint32_t                scale_factor)
 {
 {
-       int                                     result;
+       uint64_t                abstime_interval;
+       uint64_t                abstime_leeway;
 
 
-       if (BAD_ALRMTYPE(alarm_type))
-               return (-1);
-       if (BAD_MACH_TIMESPEC(alarm_time))
-               return (-1);
-       if ((alarm_type & ALRMTYPE) == TIME_RELATIVE)
-               ADD_MACH_TIMESPEC(alarm_time, clock_time);
+       clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime_interval);
+       clock_interval_to_absolutetime_interval(leeway, scale_factor, &abstime_leeway);
 
 
-       result = CMP_MACH_TIMESPEC(alarm_time, clock_time);
+       _clock_delay_until_deadline_with_leeway(abstime_interval, mach_absolute_time() + abstime_interval, abstime_leeway);
+}
 
 
-       return ((result >= 0)? result: 0);
+void
+delay(
+       int             usec)
+{
+       delay_for_interval((usec < 0)? -usec: usec, NSEC_PER_USEC);
 }
 
 }
 
-mach_timespec_t
-clock_get_system_value(void)
+/*
+ *     Miscellaneous routines.
+ */
+void
+clock_interval_to_deadline(
+       uint32_t                        interval,
+       uint32_t                        scale_factor,
+       uint64_t                        *result)
 {
 {
-       clock_t                         clock = &clock_list[SYSTEM_CLOCK];
-       mach_timespec_t         value;
+       uint64_t        abstime;
 
 
-       (void) (*clock->cl_ops->c_gettime)(&value);
+       clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
 
 
-       return value;
+       *result = mach_absolute_time() + abstime;
 }
 
 }
 
-mach_timespec_t
-clock_get_calendar_value(void)
+void
+clock_absolutetime_interval_to_deadline(
+       uint64_t                        abstime,
+       uint64_t                        *result)
 {
 {
-       clock_t                         clock = &clock_list[CALENDAR_CLOCK];
-       mach_timespec_t         value = MACH_TIMESPEC_ZERO;
+       *result = mach_absolute_time() + abstime;
+}
 
 
-       (void) (*clock->cl_ops->c_gettime)(&value);
+void
+clock_continuoustime_interval_to_deadline(
+       uint64_t                        conttime,
+       uint64_t                        *result)
+{
+       *result = mach_continuous_time() + conttime;
+}
 
 
-       return value;
+void
+clock_get_uptime(
+       uint64_t        *result)
+{
+       *result = mach_absolute_time();
 }
 
 void
 }
 
 void
@@ -836,120 +1597,144 @@ clock_deadline_for_periodic_event(
        }
 }
 
        }
 }
 
-void
-mk_timebase_info(
-       uint32_t                        *delta,
-       uint32_t                        *abs_to_ns_numer,
-       uint32_t                        *abs_to_ns_denom,
-       uint32_t                        *proc_to_abs_numer,
-       uint32_t                        *proc_to_abs_denom)
+uint64_t
+mach_continuous_time(void)
 {
 {
-       mach_timebase_info_data_t       info;
-       uint32_t                                        one = 1;
-
-       clock_timebase_info(&info);
-
-       copyout((void *)&one, (void *)delta, sizeof (uint32_t));
-
-       copyout((void *)&info.numer, (void *)abs_to_ns_numer, sizeof (uint32_t));
-       copyout((void *)&info.denom, (void *)abs_to_ns_denom, sizeof (uint32_t));
-
-       copyout((void *)&one, (void *)proc_to_abs_numer, sizeof (uint32_t));
-       copyout((void *)&one, (void *)proc_to_abs_denom, sizeof (uint32_t));
+       while(1) {      
+               uint64_t read1 = mach_absolutetime_asleep;
+               uint64_t absolute = mach_absolute_time();
+               OSMemoryBarrier();
+               uint64_t read2 = mach_absolutetime_asleep;
+
+               if(__builtin_expect(read1 == read2, 1)) {
+                       return absolute + read1;
+               }
+       }
 }
 
 }
 
-kern_return_t
-mach_timebase_info(
-       mach_timebase_info_t    out_info)
+uint64_t
+mach_continuous_approximate_time(void)
 {
 {
-       mach_timebase_info_data_t       info;
-
-       clock_timebase_info(&info);
-
-       copyout((void *)&info, (void *)out_info, sizeof (info));
-
-       return (KERN_SUCCESS);
+       while(1) {
+               uint64_t read1 = mach_absolutetime_asleep;
+               uint64_t absolute = mach_approximate_time();
+               OSMemoryBarrier();
+               uint64_t read2 = mach_absolutetime_asleep;
+
+               if(__builtin_expect(read1 == read2, 1)) {
+                       return absolute + read1;
+               }
+       }
 }
 
 }
 
-kern_return_t
-mach_wait_until(
-       uint64_t                deadline)
-{
-       int                             wait_result;
-
-       wait_result = assert_wait((event_t)&mach_wait_until, THREAD_ABORTSAFE);
-       if (wait_result == THREAD_WAITING) {
-               thread_set_timer_deadline(deadline);
-               wait_result = thread_block(THREAD_CONTINUE_NULL);
-               if (wait_result != THREAD_TIMED_OUT)
-                       thread_cancel_timer();
-       }
+/*
+ * continuoustime_to_absolutetime
+ * Must be called with interrupts disabled
+ * Returned value is only valid until the next update to
+ * mach_continuous_time 
+ */
+uint64_t
+continuoustime_to_absolutetime(uint64_t conttime) {
+       if (conttime <= mach_absolutetime_asleep)
+               return 0;
+       else
+               return conttime - mach_absolutetime_asleep;
+}
 
 
-       return ((wait_result == THREAD_INTERRUPTED)? KERN_ABORTED: KERN_SUCCESS);
+/*
+ * absolutetime_to_continuoustime
+ * Must be called with interrupts disabled
+ * Returned value is only valid until the next update to
+ * mach_continuous_time 
+ */
+uint64_t
+absolutetime_to_continuoustime(uint64_t abstime) {
+       return abstime + mach_absolutetime_asleep;
 }
 
 }
 
+#if    CONFIG_DTRACE
+
+/*
+ * clock_get_calendar_nanotime_nowait
+ *
+ * Description:        Non-blocking version of clock_get_calendar_nanotime()
+ *
+ * Notes:      This function operates by separately tracking calendar time
+ *             updates using a two element structure to copy the calendar
+ *             state, which may be asynchronously modified.  It utilizes
+ *             barrier instructions in the tracking process and in the local
+ *             stable snapshot process in order to ensure that a consistent
+ *             snapshot is used to perform the calculation.
+ */
 void
 void
-clock_adjtime(
-       int32_t         *secs,
-       int32_t         *microsecs)
+clock_get_calendar_nanotime_nowait(
+       clock_sec_t                     *secs,
+       clock_nsec_t            *nanosecs)
 {
 {
-       uint32_t        interval;
-       spl_t           s;
+       int i = 0;
+       uint64_t                now;
+       struct unlocked_clock_calend stable;
+       struct bintime bt;
 
 
-       s = splclock();
-       simple_lock(&calend_adjlock);
+       for (;;) {
+               stable = flipflop[i];           /* take snapshot */
+
+               /*
+                * Use a barrier instructions to ensure atomicity.  We AND
+                * off the "in progress" bit to get the current generation
+                * count.
+                */
+               (void)hw_atomic_and(&stable.gen, ~(uint32_t)1);
 
 
-       interval = clock_set_calendar_adjtime(secs, microsecs);
-       if (interval != 0) {
-               if (calend_adjdeadline >= interval)
-                       calend_adjdeadline -= interval;
-               clock_deadline_for_periodic_event(interval, mach_absolute_time(),
-                                                                                               &calend_adjdeadline);
+               /*
+                * If an update _is_ in progress, the generation count will be
+                * off by one, if it _was_ in progress, it will be off by two,
+                * and if we caught it at a good time, it will be equal (and
+                * our snapshot is threfore stable).
+                */
+               if (flipflop[i].gen == stable.gen)
+                       break;
 
 
-               timer_call_enter(&calend_adjcall, calend_adjdeadline);
+               /* Switch to the other element of the flipflop, and try again. */
+               i ^= 1;
        }
        }
-       else
-               timer_call_cancel(&calend_adjcall);
 
 
-       simple_unlock(&calend_adjlock);
-       splx(s);
+       now = mach_absolute_time();
+
+       bt = get_scaled_time(now);
+
+       bintime_add(&bt, &clock_calend.bintime);
+
+       bintime2nsclock(&bt, secs, nanosecs);
 }
 
 }
 
-static void
-calend_adjust_call(
-       timer_call_param_t              p0,
-       timer_call_param_t              p1)
+static void 
+clock_track_calend_nowait(void)
 {
 {
-       uint32_t        interval;
-       spl_t           s;
-
-       s = splclock();
-       simple_lock(&calend_adjlock);
+       int i;
 
 
-       interval = clock_adjust_calendar();
-       if (interval != 0) {
-               clock_deadline_for_periodic_event(interval, mach_absolute_time(),
-                                                                                               &calend_adjdeadline);
+       for (i = 0; i < 2; i++) {
+               struct clock_calend tmp = clock_calend;
 
 
-               timer_call_enter(&calend_adjcall, calend_adjdeadline);
-       }
+               /*
+                * Set the low bit if the generation count; since we use a
+                * barrier instruction to do this, we are guaranteed that this
+                * will flag an update in progress to an async caller trying
+                * to examine the contents.
+                */
+               (void)hw_atomic_or(&flipflop[i].gen, 1);
 
 
-       simple_unlock(&calend_adjlock);
-       splx(s);
-}
+               flipflop[i].calend = tmp;
 
 
-void
-clock_wakeup_calendar(void)
-{
-       thread_call_enter(&calend_wakecall);
+               /*
+                * Increment the generation count to clear the low bit to
+                * signal completion.  If a caller compares the generation
+                * count after taking a copy while in progress, the count
+                * will be off by two.
+                */
+               (void)hw_atomic_add(&flipflop[i].gen, 1);
+       }
 }
 
 }
 
-static void
-calend_dowakeup(
-       thread_call_param_t             p0,
-       thread_call_param_t             p1)
-{
-       void            IOKitResetTime(void);
+#endif /* CONFIG_DTRACE */
 
 
-       IOKitResetTime();
-}