]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/kern/clock.c
xnu-3789.1.32.tar.gz
[apple/xnu.git] / osfmk / kern / clock.c
index a0b3f9077539a0bdae4a89df0cf05943911256a5..6173d89b6d1534d5d540fa074cb36f35893625ec 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * @OSF_COPYRIGHT@
  */
 /*
- *     File:           kern/clock.c
- *     Purpose:        Routines for the creation and use of kernel
- *                     alarm clock services. This file and the ipc
- *                     routines in kern/ipc_clock.c constitute the
- *                     machine-independent clock service layer.
  */
 
-#include <mach_host.h>
-
 #include <mach/mach_types.h>
-#include <mach/boolean.h>
-#include <mach/processor_info.h>
-#include <mach/vm_param.h>
-
-#include <kern/cpu_number.h>
-#include <kern/misc_protos.h>
-#include <kern/lock.h>
-#include <kern/host.h>
+
 #include <kern/spl.h>
 #include <kern/sched_prim.h>
 #include <kern/thread.h>
-#include <kern/ipc_host.h>
 #include <kern/clock.h>
-#include <kern/zalloc.h>
+#include <kern/host_notify.h>
+#include <kern/thread_call.h>
+#include <libkern/OSAtomic.h>
 
-#include <ipc/ipc_types.h>
-#include <ipc/ipc_port.h>
+#include <IOKit/IOPlatformExpert.h>
+
+#include <machine/commpage.h>
 
 #include <mach/mach_traps.h>
-#include <mach/clock_reply.h>
 #include <mach/mach_time.h>
 
-#include <mach/clock_server.h>
-#include <mach/clock_priv_server.h>
-#include <mach/host_priv_server.h>
+#include <sys/kdebug.h>
 
-/*
- * Exported interface
- */
+uint32_t       hz_tick_interval = 1;
 
-#include <mach/clock_server.h>
-#include <mach/mach_host_server.h>
 
-/* local data declarations */
-decl_simple_lock_data(static,ClockLock)                /* clock system synchronization */
-static struct  zone            *alarm_zone;    /* zone for user alarms */
-static struct  alarm           *alrmfree;              /* alarm free list pointer */
-static struct  alarm           *alrmdone;              /* alarm done list pointer */
-static long                                    alrm_seqno;             /* uniquely identifies alarms */
-static thread_call_data_t      alarm_deliver;
+decl_simple_lock_data(,clock_lock)
 
-decl_simple_lock_data(static,calend_adjlock)
+#define clock_lock()   \
+       simple_lock(&clock_lock)
 
-static timer_call_data_t       calend_adjcall;
-static uint64_t                                calend_adjdeadline;
+#define clock_unlock() \
+       simple_unlock(&clock_lock)
 
-static thread_call_data_t      calend_wakecall;
+#define clock_lock_init()      \
+       simple_lock_init(&clock_lock, 0)
 
-/* external declarations */
-extern struct clock    clock_list[];
-extern int             clock_count;
+#ifdef kdp_simple_lock_is_acquired
+boolean_t kdp_clock_is_locked()
+{
+       return kdp_simple_lock_is_acquired(&clock_lock);
+}
+#endif
 
-/* local clock subroutines */
-static
-void   flush_alarms(
-                       clock_t                 clock);
+/*
+ *     Time of day (calendar) variables.
+ *
+ *     Algorithm:
+ *
+ *     TOD <- (seconds + epoch, fraction) <- CONV(current absolute time + offset)
+ *
+ *     where CONV converts absolute time units into seconds and a fraction.
+ */
+static struct clock_calend {
+       uint64_t        epoch;
+       uint64_t        offset;
+       uint64_t    epoch_absolute;
 
-static
-void   post_alarm(
-                       clock_t                 clock,
-                       alarm_t                 alarm);
+       int32_t         adjdelta;       /* Nanosecond time delta for this adjustment period */
+       uint64_t        adjstart;       /* Absolute time value for start of this adjustment period */
+       uint32_t        adjoffset;      /* Absolute time offset for this adjustment period as absolute value */
+} clock_calend;
 
-static
-int            check_time(
-                       alarm_type_t    alarm_type,
-                       mach_timespec_t *alarm_time,
-                       mach_timespec_t *clock_time);
+#if    CONFIG_DTRACE
 
-static
-void   clock_alarm_deliver(
-                       thread_call_param_t             p0,
-                       thread_call_param_t             p1);
+/*
+ *     Unlocked calendar flipflop; this is used to track a clock_calend such
+ *     that we can safely access a snapshot of a valid  clock_calend structure
+ *     without needing to take any locks to do it.
+ *
+ *     The trick is to use a generation count and set the low bit when it is
+ *     being updated/read; by doing this, we guarantee, through use of the
+ *     hw_atomic functions, that the generation is incremented when the bit
+ *     is cleared atomically (by using a 1 bit add).
+ */
+static struct unlocked_clock_calend {
+       struct clock_calend     calend;         /* copy of calendar */
+       uint32_t                gen;            /* generation count */
+} flipflop[ 2];
 
-static
-void   calend_adjust_call(
-                       timer_call_param_t      p0,
-                       timer_call_param_t      p1);
+static void clock_track_calend_nowait(void);
 
-static
-void   calend_dowakeup(
-                       thread_call_param_t             p0,
-                       thread_call_param_t             p1);
+#endif
 
 /*
- *     Macros to lock/unlock clock system.
+ *     Calendar adjustment variables and values.
  */
-#define LOCK_CLOCK(s)                  \
-       s = splclock();                 \
-       simple_lock(&ClockLock);
+#define calend_adjperiod       (NSEC_PER_SEC / 100)    /* adjustment period, ns */
+#define calend_adjskew         (40 * NSEC_PER_USEC)    /* "standard" skew, ns / period */
+#define        calend_adjbig           (NSEC_PER_SEC)                  /* use 10x skew above adjbig ns */
 
-#define UNLOCK_CLOCK(s)                        \
-       simple_unlock(&ClockLock);      \
-       splx(s);
+static int64_t                         calend_adjtotal;                /* Nanosecond remaining total adjustment */
+static uint64_t                                calend_adjdeadline;             /* Absolute time value for next adjustment period */
+static uint32_t                                calend_adjinterval;             /* Absolute time interval of adjustment period */
+
+static timer_call_data_t       calend_adjcall;
+static uint32_t                                calend_adjactive;
+
+static uint32_t                calend_set_adjustment(
+                                               long                    *secs,
+                                               int                             *microsecs);
+
+static void                    calend_adjust_call(void);
+static uint32_t                calend_adjust(void);
+
+void _clock_delay_until_deadline(uint64_t              interval,
+                                                                uint64_t               deadline);
+void _clock_delay_until_deadline_with_leeway(uint64_t          interval,
+                                                                                        uint64_t               deadline,
+                                                                                        uint64_t               leeway);
+
+/* Seconds boottime epoch */
+static uint64_t clock_boottime;
+static uint32_t clock_boottime_usec;
+
+#define TIME_ADD(rsecs, secs, rfrac, frac, unit)       \
+MACRO_BEGIN                                                                                    \
+       if (((rfrac) += (frac)) >= (unit)) {                    \
+               (rfrac) -= (unit);                                                      \
+               (rsecs) += 1;                                                           \
+       }                                                                                               \
+       (rsecs) += (secs);                                                              \
+MACRO_END
+
+#define TIME_SUB(rsecs, secs, rfrac, frac, unit)       \
+MACRO_BEGIN                                                                                    \
+       if ((int)((rfrac) -= (frac)) < 0) {                             \
+               (rfrac) += (unit);                                                      \
+               (rsecs) -= 1;                                                           \
+       }                                                                                               \
+       (rsecs) -= (secs);                                                              \
+MACRO_END
 
 /*
  *     clock_config:
@@ -141,37 +168,11 @@ void      calend_dowakeup(
 void
 clock_config(void)
 {
-       clock_t                 clock;
-       register int    i;
-
-       assert(cpu_number() == master_cpu);
-
-       simple_lock_init(&ClockLock, 0);
-       thread_call_setup(&alarm_deliver, clock_alarm_deliver, NULL);
-
-       simple_lock_init(&calend_adjlock, 0);
-       timer_call_setup(&calend_adjcall, calend_adjust_call, NULL);
+       clock_lock_init();
 
-       thread_call_setup(&calend_wakecall, calend_dowakeup, NULL);
+       timer_call_setup(&calend_adjcall, (timer_call_func_t)calend_adjust_call, NULL);
 
-       /*
-        * Configure clock devices.
-        */
-       for (i = 0; i < clock_count; i++) {
-               clock = &clock_list[i];
-               if (clock->cl_ops) {
-                       if ((*clock->cl_ops->c_config)() == 0)
-                               clock->cl_ops = 0;
-               }
-       }
-
-       /*
-        * Initialize the timer callouts.
-        */
-       timer_call_initialize();
-
-       /* start alarm sequence numbers at 0 */
-       alrm_seqno = 0;
+       clock_oldconfig();
 }
 
 /*
@@ -182,696 +183,704 @@ clock_config(void)
 void
 clock_init(void)
 {
-       clock_t                 clock;
-       register int    i;
-
-       /*
-        * Initialize basic clock structures.
-        */
-       for (i = 0; i < clock_count; i++) {
-               clock = &clock_list[i];
-               if (clock->cl_ops && clock->cl_ops->c_init)
-                       (*clock->cl_ops->c_init)();
-       }
+       clock_oldinit();
 }
 
 /*
- * Called by machine dependent code
- * to initialize areas dependent on the
- * timebase value.  May be called multiple
- * times during start up.
+ *     clock_timebase_init:
+ *
+ *     Called by machine dependent code
+ *     to initialize areas dependent on the
+ *     timebase value.  May be called multiple
+ *     times during start up.
  */
 void
 clock_timebase_init(void)
 {
+       uint64_t        abstime;
+
+       nanoseconds_to_absolutetime(calend_adjperiod, &abstime);
+       calend_adjinterval = (uint32_t)abstime;
+
+       nanoseconds_to_absolutetime(NSEC_PER_SEC / 100, &abstime);
+       hz_tick_interval = (uint32_t)abstime;
+
        sched_timebase_init();
 }
 
 /*
- * Initialize the clock ipc service facility.
+ *     mach_timebase_info_trap:
+ *
+ *     User trap returns timebase constant.
  */
-void
-clock_service_create(void)
+kern_return_t
+mach_timebase_info_trap(
+       struct mach_timebase_info_trap_args *args)
 {
-       clock_t                 clock;
-       register int    i;
+       mach_vm_address_t                       out_info_addr = args->info;
+       mach_timebase_info_data_t       info;
 
-       /*
-        * Initialize ipc clock services.
-        */
-       for (i = 0; i < clock_count; i++) {
-               clock = &clock_list[i];
-               if (clock->cl_ops) {
-                       ipc_clock_init(clock);
-                       ipc_clock_enable(clock);
-               }
-       }
+       clock_timebase_info(&info);
 
-       /*
-        * Perform miscellaneous late
-        * initialization.
-        */
-       i = sizeof(struct alarm);
-       alarm_zone = zinit(i, (4096/i)*i, 10*i, "alarms");
+       copyout((void *)&info, out_info_addr, sizeof (info));
+
+       return (KERN_SUCCESS);
 }
 
 /*
- * Get the service port on a clock.
+ *     Calendar routines.
  */
-kern_return_t
-host_get_clock_service(
-       host_t                  host,
-       clock_id_t              clock_id,
-       clock_t                 *clock)         /* OUT */
-{
-       if (host == HOST_NULL || clock_id < 0 || clock_id >= clock_count) {
-               *clock = CLOCK_NULL;
-               return (KERN_INVALID_ARGUMENT);
-       }
-
-       *clock = &clock_list[clock_id];
-       if ((*clock)->cl_ops == 0)
-               return (KERN_FAILURE);
-       return (KERN_SUCCESS);
-}
 
 /*
- * Get the control port on a clock.
+ *     clock_get_calendar_microtime:
+ *
+ *     Returns the current calendar value,
+ *     microseconds as the fraction.
  */
-kern_return_t
-host_get_clock_control(
-       host_priv_t             host_priv,
-       clock_id_t              clock_id,
-       clock_t                 *clock)         /* OUT */
-{
-       if (host_priv == HOST_PRIV_NULL || clock_id < 0 || clock_id >= clock_count) {
-               *clock = CLOCK_NULL;
-               return (KERN_INVALID_ARGUMENT);
+void
+clock_get_calendar_microtime(
+       clock_sec_t                     *secs,
+       clock_usec_t            *microsecs)
+{
+       clock_get_calendar_absolute_and_microtime(secs, microsecs, NULL);
+}
+
+static void
+clock_get_calendar_absolute_and_microtime_locked(
+       clock_sec_t                     *secs,
+       clock_usec_t            *microsecs,
+       uint64_t                *abstime)
+{
+       uint64_t now  = mach_absolute_time();
+       if (abstime)
+               *abstime = now;
+
+       if (clock_calend.adjdelta < 0) {
+               uint32_t        t32;
+
+               /*
+                * Since offset is decremented during a negative adjustment,
+                * ensure that time increases monotonically without going
+                * temporarily backwards.
+                * If the delta has not yet passed, now is set to the start
+                * of the current adjustment period; otherwise, we're between
+                * the expiry of the delta and the next call to calend_adjust(),
+                * and we offset accordingly.
+                */
+               if (now > clock_calend.adjstart) {
+                       t32 = (uint32_t)(now - clock_calend.adjstart);
+
+                       if (t32 > clock_calend.adjoffset)
+                               now -= clock_calend.adjoffset;
+                       else
+                               now = clock_calend.adjstart;
+               }
        }
 
-       *clock = &clock_list[clock_id];
-       if ((*clock)->cl_ops == 0)
-               return (KERN_FAILURE);
-       return (KERN_SUCCESS);
+       now += clock_calend.offset;
+
+       absolutetime_to_microtime(now, secs, microsecs);
+
+       *secs += (clock_sec_t)clock_calend.epoch;
 }
 
 /*
- * Get the current clock time.
+ *     clock_get_calendar_absolute_and_microtime:
+ *
+ *     Returns the current calendar value,
+ *     microseconds as the fraction. Also
+ *     returns mach_absolute_time if abstime
+ *     is not NULL.
  */
-kern_return_t
-clock_get_time(
-       clock_t                 clock,
-       mach_timespec_t *cur_time)      /* OUT */
+void
+clock_get_calendar_absolute_and_microtime(
+       clock_sec_t                     *secs,
+       clock_usec_t            *microsecs,
+       uint64_t                *abstime)
 {
-       if (clock == CLOCK_NULL)
-               return (KERN_INVALID_ARGUMENT);
-       return ((*clock->cl_ops->c_gettime)(cur_time));
-}
+       spl_t                   s;
 
-/*
- * Get clock attributes.
- */
-kern_return_t
-clock_get_attributes(
-       clock_t                                 clock,
-       clock_flavor_t                  flavor,
-       clock_attr_t                    attr,           /* OUT */
-       mach_msg_type_number_t  *count)         /* IN/OUT */
-{
-       if (clock == CLOCK_NULL)
-               return (KERN_INVALID_ARGUMENT);
-       if (clock->cl_ops->c_getattr)
-               return(clock->cl_ops->c_getattr(flavor, attr, count));
-       else
-               return (KERN_FAILURE);
+       s = splclock();
+       clock_lock();
+
+       clock_get_calendar_absolute_and_microtime_locked(secs, microsecs, abstime);
+
+       clock_unlock();
+       splx(s);
 }
 
 /*
- * Set the current clock time.
+ *     clock_get_calendar_nanotime:
+ *
+ *     Returns the current calendar value,
+ *     nanoseconds as the fraction.
+ *
+ *     Since we do not have an interface to
+ *     set the calendar with resolution greater
+ *     than a microsecond, we honor that here.
  */
-kern_return_t
-clock_set_time(
-       clock_t                 clock,
-       mach_timespec_t new_time)
+void
+clock_get_calendar_nanotime(
+       clock_sec_t                     *secs,
+       clock_nsec_t            *nanosecs)
 {
-       mach_timespec_t *clock_time;
+       spl_t                   s;
 
-       if (clock == CLOCK_NULL)
-               return (KERN_INVALID_ARGUMENT);
-       if (clock->cl_ops->c_settime == NULL)
-               return (KERN_FAILURE);
-       clock_time = &new_time;
-       if (BAD_MACH_TIMESPEC(clock_time))
-               return (KERN_INVALID_VALUE);
+       s = splclock();
+       clock_lock();
 
-       /*
-        * Flush all outstanding alarms.
-        */
-       flush_alarms(clock);
+       clock_get_calendar_absolute_and_microtime_locked(secs, nanosecs, NULL);
 
-       /*
-        * Set the new time.
-        */
-       return (clock->cl_ops->c_settime(clock_time));
+       *nanosecs *= NSEC_PER_USEC;
+
+       clock_unlock();
+       splx(s);
 }
 
 /*
- * Set the clock alarm resolution.
+ *     clock_gettimeofday:
+ *
+ *     Kernel interface for commpage implementation of
+ *     gettimeofday() syscall.
+ *
+ *     Returns the current calendar value, and updates the
+ *     commpage info as appropriate.  Because most calls to
+ *     gettimeofday() are handled in user mode by the commpage,
+ *     this routine should be used infrequently.
  */
-kern_return_t
-clock_set_attributes(
-       clock_t                                 clock,
-       clock_flavor_t                  flavor,
-       clock_attr_t                    attr,
-       mach_msg_type_number_t  count)
-{
-       if (clock == CLOCK_NULL)
-               return (KERN_INVALID_ARGUMENT);
-       if (clock->cl_ops->c_setattr)
-               return (clock->cl_ops->c_setattr(flavor, attr, count));
-       else
-               return (KERN_FAILURE);
+void
+clock_gettimeofday(
+       clock_sec_t             *secs,
+       clock_usec_t    *microsecs)
+{
+       clock_gettimeofday_and_absolute_time(secs, microsecs, NULL);
 }
 
-/*
- * Setup a clock alarm.
- */
-kern_return_t
-clock_alarm(
-       clock_t                                 clock,
-       alarm_type_t                    alarm_type,
-       mach_timespec_t                 alarm_time,
-       ipc_port_t                              alarm_port,
-       mach_msg_type_name_t    alarm_port_type)
-{
-       alarm_t                                 alarm;
-       mach_timespec_t                 clock_time;
-       int                                             chkstat;
-       kern_return_t                   reply_code;
-       spl_t                                   s;
-
-       if (clock == CLOCK_NULL)
-               return (KERN_INVALID_ARGUMENT);
-       if (clock->cl_ops->c_setalrm == 0)
-               return (KERN_FAILURE);
-       if (IP_VALID(alarm_port) == 0)
-               return (KERN_INVALID_CAPABILITY);
+void
+clock_gettimeofday_and_absolute_time(
+       clock_sec_t             *secs,
+       clock_usec_t    *microsecs,
+       uint64_t                *mach_time)
+{
+       uint64_t                now;
+       spl_t                   s;
 
-       /*
-        * Check alarm parameters. If parameters are invalid,
-        * send alarm message immediately.
-        */
-       (*clock->cl_ops->c_gettime)(&clock_time);
-       chkstat = check_time(alarm_type, &alarm_time, &clock_time);
-       if (chkstat <= 0) {
-               reply_code = (chkstat < 0 ? KERN_INVALID_VALUE : KERN_SUCCESS);
-               clock_alarm_reply(alarm_port, alarm_port_type,
-                                 reply_code, alarm_type, clock_time);
-               return (KERN_SUCCESS);
+       s = splclock();
+       clock_lock();
+
+       now = mach_absolute_time();
+
+       if (clock_calend.adjdelta >= 0) {
+               clock_gettimeofday_set_commpage(now, clock_calend.epoch, clock_calend.offset, secs, microsecs);
        }
+       else {
+               uint32_t        t32;
 
-       /*
-        * Get alarm and add to clock alarm list.
-        */
+               if (now > clock_calend.adjstart) {
+                       t32 = (uint32_t)(now - clock_calend.adjstart);
 
-       LOCK_CLOCK(s);
-       if ((alarm = alrmfree) == 0) {
-               UNLOCK_CLOCK(s);
-               alarm = (alarm_t) zalloc(alarm_zone);
-               if (alarm == 0)
-                       return (KERN_RESOURCE_SHORTAGE);
-               LOCK_CLOCK(s);
+                       if (t32 > clock_calend.adjoffset)
+                               now -= clock_calend.adjoffset;
+                       else
+                               now = clock_calend.adjstart;
+               }
+
+               now += clock_calend.offset;
+
+               absolutetime_to_microtime(now, secs, microsecs);
+
+               *secs += (clock_sec_t)clock_calend.epoch;
        }
-       else
-               alrmfree = alarm->al_next;
-
-       alarm->al_status = ALARM_CLOCK;
-       alarm->al_time = alarm_time;
-       alarm->al_type = alarm_type;
-       alarm->al_port = alarm_port;
-       alarm->al_port_type = alarm_port_type;
-       alarm->al_clock = clock;
-       alarm->al_seqno = alrm_seqno++;
-       post_alarm(clock, alarm);
-       UNLOCK_CLOCK(s);
 
-       return (KERN_SUCCESS);
+       clock_unlock();
+       splx(s);
+
+       if (mach_time) {
+               *mach_time = now;
+       }
 }
 
 /*
- * Sleep on a clock. System trap. User-level libmach clock_sleep
- * interface call takes a mach_timespec_t sleep_time argument which it
- * converts to sleep_sec and sleep_nsec arguments which are then
- * passed to clock_sleep_trap.
+ *     clock_set_calendar_microtime:
+ *
+ *     Sets the current calendar value by
+ *     recalculating the epoch and offset
+ *     from the system clock.
+ *
+ *     Also adjusts the boottime to keep the
+ *     value consistent, writes the new
+ *     calendar value to the platform clock,
+ *     and sends calendar change notifications.
  */
-kern_return_t
-clock_sleep_trap(
-       struct clock_sleep_trap_args *args)
-{
-       mach_port_name_t        clock_name = args->clock_name;
-       sleep_type_t            sleep_type = args->sleep_type;
-       int                                     sleep_sec = args->sleep_sec;
-       int                                     sleep_nsec = args->sleep_nsec;
-       mach_vm_address_t       wakeup_time_addr = args->wakeup_time;  
-       clock_t                         clock;
-       mach_timespec_t         swtime;
-       kern_return_t           rvalue;
+void
+clock_set_calendar_microtime(
+       clock_sec_t                     secs,
+       clock_usec_t            microsecs)
+{
+       clock_sec_t                     sys;
+       clock_usec_t            microsys;
+       uint64_t                        absolutesys;
+       clock_sec_t                     newsecs;
+       clock_sec_t                     oldsecs;
+    clock_usec_t        newmicrosecs;
+       clock_usec_t            oldmicrosecs;
+       uint64_t                        commpage_value;
+       spl_t                           s;
+
+    newsecs = secs;
+    newmicrosecs = microsecs;
+
+       s = splclock();
+       clock_lock();
+
+       commpage_disable_timestamp();
 
        /*
-        * Convert the trap parameters.
+        *      Adjust the boottime based on the delta.
         */
-       if (clock_name != MACH_PORT_NULL)
-               clock = port_name_to_clock(clock_name);
-       else
-               clock = &clock_list[SYSTEM_CLOCK];
-
-       swtime.tv_sec  = sleep_sec;
-       swtime.tv_nsec = sleep_nsec;
+       clock_get_calendar_absolute_and_microtime_locked(&oldsecs, &oldmicrosecs, &absolutesys);
+       if (oldsecs < secs || (oldsecs == secs && oldmicrosecs < microsecs)){
+               // moving forwards
+               long deltasecs = secs, deltamicrosecs = microsecs;
+               TIME_SUB(deltasecs, oldsecs, deltamicrosecs, oldmicrosecs, USEC_PER_SEC);
+               TIME_ADD(clock_boottime, deltasecs, clock_boottime_usec, deltamicrosecs, USEC_PER_SEC);
+       } else {
+               // moving backwards
+               long deltasecs = oldsecs, deltamicrosecs = oldmicrosecs;
+               TIME_SUB(deltasecs, secs, deltamicrosecs, microsecs, USEC_PER_SEC);
+               TIME_SUB(clock_boottime, deltasecs, clock_boottime_usec, deltamicrosecs, USEC_PER_SEC);
+       }
+       commpage_value = clock_boottime * USEC_PER_SEC + clock_boottime_usec;
 
        /*
-        * Call the actual clock_sleep routine.
+        *      Calculate the new calendar epoch based on
+        *      the new value and the system clock.
         */
-       rvalue = clock_sleep_internal(clock, sleep_type, &swtime);
+       absolutetime_to_microtime(absolutesys, &sys, &microsys);
+       TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC);
 
        /*
-        * Return current time as wakeup time.
+        *      Set the new calendar epoch.
         */
-       if (rvalue != KERN_INVALID_ARGUMENT && rvalue != KERN_FAILURE) {
-               copyout((char *)&swtime, wakeup_time_addr, sizeof(mach_timespec_t));
-       }
-       return (rvalue);
-}      
+       clock_calend.epoch = secs;
 
-/*
- * Kernel internally callable clock sleep routine. The calling
- * thread is suspended until the requested sleep time is reached.
- */
-kern_return_t
-clock_sleep_internal(
-       clock_t                         clock,
-       sleep_type_t            sleep_type,
-       mach_timespec_t         *sleep_time)
-{
-       alarm_t                         alarm;
-       mach_timespec_t         clock_time;
-       kern_return_t           rvalue;
-       int                                     chkstat;
-       spl_t                           s;
+       nanoseconds_to_absolutetime((uint64_t)microsecs * NSEC_PER_USEC, &clock_calend.offset);
 
-       if (clock == CLOCK_NULL)
-               return (KERN_INVALID_ARGUMENT);
-       if (clock->cl_ops->c_setalrm == 0)
-               return (KERN_FAILURE);
+       clock_interval_to_absolutetime_interval((uint32_t) secs, NSEC_PER_SEC, &clock_calend.epoch_absolute);
+       clock_calend.epoch_absolute += clock_calend.offset;
 
        /*
-        * Check sleep parameters. If parameters are invalid
-        * return an error, otherwise post alarm request.
+        *      Cancel any adjustment in progress.
         */
-       (*clock->cl_ops->c_gettime)(&clock_time);
-
-       chkstat = check_time(sleep_type, sleep_time, &clock_time);
-       if (chkstat < 0)
-               return (KERN_INVALID_VALUE);
-       rvalue = KERN_SUCCESS;
-       if (chkstat > 0) {
-               wait_result_t wait_result;
+       calend_adjtotal = clock_calend.adjdelta = 0;
 
-               /*
-                * Get alarm and add to clock alarm list.
-                */
+       clock_unlock();
 
-               LOCK_CLOCK(s);
-               if ((alarm = alrmfree) == 0) {
-                       UNLOCK_CLOCK(s);
-                       alarm = (alarm_t) zalloc(alarm_zone);
-                       if (alarm == 0)
-                               return (KERN_RESOURCE_SHORTAGE);
-                       LOCK_CLOCK(s);
-               }
-               else
-                       alrmfree = alarm->al_next;
+       /*
+        *      Set the new value for the platform clock.
+        */
+       PESetUTCTimeOfDay(newsecs, newmicrosecs);
 
-               /*
-                * Wait for alarm to occur.
-                */
-               wait_result = assert_wait((event_t)alarm, THREAD_ABORTSAFE);
-               if (wait_result == THREAD_WAITING) {
-                       alarm->al_time = *sleep_time;
-                       alarm->al_status = ALARM_SLEEP;
-                       post_alarm(clock, alarm);
-                       UNLOCK_CLOCK(s);
+       splx(s);
 
-                       wait_result = thread_block(THREAD_CONTINUE_NULL);
+       commpage_update_boottime(commpage_value);
 
-                       /*
-                        * Note if alarm expired normally or whether it
-                        * was aborted. If aborted, delete alarm from
-                        * clock alarm list. Return alarm to free list.
-                        */
-                       LOCK_CLOCK(s);
-                       if (alarm->al_status != ALARM_DONE) {
-                               assert(wait_result != THREAD_AWAKENED);
-                               if (((alarm->al_prev)->al_next = alarm->al_next) != NULL)
-                                       (alarm->al_next)->al_prev = alarm->al_prev;
-                               rvalue = KERN_ABORTED;
-                       }
-                       *sleep_time = alarm->al_time;
-                       alarm->al_status = ALARM_FREE;
-               } else {
-                       assert(wait_result == THREAD_INTERRUPTED);
-                       assert(alarm->al_status == ALARM_FREE);
-                       rvalue = KERN_ABORTED;
-               }
-               alarm->al_next = alrmfree;
-               alrmfree = alarm;
-               UNLOCK_CLOCK(s);
-       }
-       else
-               *sleep_time = clock_time;
+       /*
+        *      Send host notifications.
+        */
+       host_notify_calendar_change();
+       host_notify_calendar_set();
 
-       return (rvalue);
+#if CONFIG_DTRACE
+       clock_track_calend_nowait();
+#endif
 }
 
 /*
- * CLOCK INTERRUPT SERVICE ROUTINES.
+ *     clock_initialize_calendar:
+ *
+ *     Set the calendar and related clocks
+ *     from the platform clock at boot or
+ *     wake event.
+ *
+ *     Also sends host notifications.
  */
 
-/*
- * Service clock alarm interrupts. Called from machine dependent
- * layer at splclock(). The clock_id argument specifies the clock,
- * and the clock_time argument gives that clock's current time.
- */
+uint64_t mach_absolutetime_asleep;
+uint64_t mach_absolutetime_last_sleep;
+
 void
-clock_alarm_intr(
-       clock_id_t                      clock_id,
-       mach_timespec_t         *clock_time)
-{
-       clock_t                         clock;
-       register alarm_t        alrm1;
-       register alarm_t        alrm2;
-       mach_timespec_t         *alarm_time;
+clock_initialize_calendar(void)
+{
+       clock_sec_t                     sys;  // sleepless time since boot in seconds
+       clock_sec_t                     secs; // Current UTC time
+       clock_sec_t                     utc_offset_secs; // Difference in current UTC time and sleepless time since boot
+       clock_usec_t            microsys;  
+       clock_usec_t            microsecs; 
+       clock_usec_t            utc_offset_microsecs; 
+       uint64_t                        new_epoch; // utc_offset_secs in mach absolute time units
        spl_t                           s;
 
-       clock = &clock_list[clock_id];
+       PEGetUTCTimeOfDay(&secs, &microsecs);
 
-       /*
-        * Update clock alarm list. All alarms that are due are moved
-        * to the alarmdone list to be serviced by the alarm_thread.
-        */
+       s = splclock();
+       clock_lock();
 
-       LOCK_CLOCK(s);
-       alrm1 = (alarm_t) &clock->cl_alarm;
-       while ((alrm2 = alrm1->al_next) != NULL) {
-               alarm_time = &alrm2->al_time;
-               if (CMP_MACH_TIMESPEC(alarm_time, clock_time) > 0)
-                       break;
+       commpage_disable_timestamp();
 
+       if ((long)secs >= (long)clock_boottime) {
                /*
-                * Alarm has expired, so remove it from the
-                * clock alarm list.
-                */  
-               if ((alrm1->al_next = alrm2->al_next) != NULL)
-                       (alrm1->al_next)->al_prev = alrm1;
+                *      Initialize the boot time based on the platform clock.
+                */
+               if (clock_boottime == 0){
+                       clock_boottime = secs;
+                       clock_boottime_usec = microsecs;
+                       commpage_update_boottime(clock_boottime * USEC_PER_SEC + clock_boottime_usec);
+               }
 
                /*
-                * If a clock_sleep() alarm, wakeup the thread
-                * which issued the clock_sleep() call.
+                *      Calculate the new calendar epoch based on
+                *      the platform clock and the system clock.
                 */
-               if (alrm2->al_status == ALARM_SLEEP) {
-                       alrm2->al_next = 0;
-                       alrm2->al_status = ALARM_DONE;
-                       alrm2->al_time = *clock_time;
-                       thread_wakeup((event_t)alrm2);
-               }
+               clock_get_system_microtime(&sys, &microsys);
+               utc_offset_secs = secs;
+               utc_offset_microsecs = microsecs;
 
-               /*
-                * If a clock_alarm() alarm, place the alarm on
-                * the alarm done list and schedule the alarm
-                * delivery mechanism.
+               // This macro mutates utc_offset_secs and micro_utc_offset
+               TIME_SUB(utc_offset_secs, sys, utc_offset_microsecs, microsys, USEC_PER_SEC);
+
+               /*
+                *      Set the new calendar epoch.
                 */
-               else {
-                       assert(alrm2->al_status == ALARM_CLOCK);
-                       if ((alrm2->al_next = alrmdone) != NULL)
-                               alrmdone->al_prev = alrm2;
-                       else
-                               thread_call_enter(&alarm_deliver);
-                       alrm2->al_prev = (alarm_t) &alrmdone;
-                       alrmdone = alrm2;
-                       alrm2->al_status = ALARM_DONE;
-                       alrm2->al_time = *clock_time;
+
+               clock_calend.epoch = utc_offset_secs;
+
+               nanoseconds_to_absolutetime((uint64_t)utc_offset_microsecs * NSEC_PER_USEC, &clock_calend.offset);
+
+               clock_interval_to_absolutetime_interval((uint32_t) utc_offset_secs, NSEC_PER_SEC, &new_epoch);
+               new_epoch += clock_calend.offset;
+
+               if (clock_calend.epoch_absolute)
+               {
+                       /* new_epoch is the difference between absolute_time and utc_time
+                        * this value will remain constant until the system sleeps.
+                        * Then, difference between values would go up by the time the system sleeps.
+                        * epoch_absolute is the last difference between the two values
+                        * so the difference in the differences would be the time of the last sleep
+                        */
+
+                       if(new_epoch > clock_calend.epoch_absolute) {
+                               mach_absolutetime_last_sleep = new_epoch - clock_calend.epoch_absolute;
+                       }
+                       else {
+                               mach_absolutetime_last_sleep = 0;
+                       }
+                       mach_absolutetime_asleep += mach_absolutetime_last_sleep;
+                       KERNEL_DEBUG_CONSTANT(
+                                 MACHDBG_CODE(DBG_MACH_CLOCK,MACH_EPOCH_CHANGE) | DBG_FUNC_NONE,
+                                 (uintptr_t) mach_absolutetime_last_sleep,
+                                 (uintptr_t) mach_absolutetime_asleep,
+                                 (uintptr_t) (mach_absolutetime_last_sleep >> 32),
+                                 (uintptr_t) (mach_absolutetime_asleep >> 32),
+                                 0);
                }
+               clock_calend.epoch_absolute = new_epoch;
+
+               /*
+                *       Cancel any adjustment in progress.
+                */
+               calend_adjtotal = clock_calend.adjdelta = 0;
        }
 
+       commpage_update_mach_continuous_time(mach_absolutetime_asleep);
+       adjust_cont_time_thread_calls();
+
+       clock_unlock();
+       splx(s);
+
        /*
-        * Setup the clock dependent layer to deliver another
-        * interrupt for the next pending alarm.
+        *      Send host notifications.
         */
-       if (alrm2)
-               (*clock->cl_ops->c_setalrm)(alarm_time);
-       UNLOCK_CLOCK(s);
+       host_notify_calendar_change();
+       
+#if CONFIG_DTRACE
+       clock_track_calend_nowait();
+#endif
 }
 
 /*
- * ALARM DELIVERY ROUTINES.
+ *     clock_get_boottime_nanotime:
+ *
+ *     Return the boottime, used by sysctl.
  */
-
-static void
-clock_alarm_deliver(
-       __unused thread_call_param_t            p0,
-       __unused thread_call_param_t            p1)
+void
+clock_get_boottime_nanotime(
+       clock_sec_t                     *secs,
+       clock_nsec_t            *nanosecs)
 {
-       register alarm_t        alrm;
-       kern_return_t           code;
-       spl_t                           s;
+       spl_t   s;
 
-       LOCK_CLOCK(s);
-       while ((alrm = alrmdone) != NULL) {
-               if ((alrmdone = alrm->al_next) != NULL)
-                       alrmdone->al_prev = (alarm_t) &alrmdone;
-               UNLOCK_CLOCK(s);
-
-               code = (alrm->al_status == ALARM_DONE? KERN_SUCCESS: KERN_ABORTED);
-               if (alrm->al_port != IP_NULL) {
-                       /* Deliver message to designated port */
-                       if (IP_VALID(alrm->al_port)) {
-                               clock_alarm_reply(alrm->al_port, alrm->al_port_type, code,
-                                                                                               alrm->al_type, alrm->al_time);
-                       }
+       s = splclock();
+       clock_lock();
 
-                       LOCK_CLOCK(s);
-                       alrm->al_status = ALARM_FREE;
-                       alrm->al_next = alrmfree;
-                       alrmfree = alrm;
-               }
-               else
-                       panic("clock_alarm_deliver");
-       }
+       *secs = (clock_sec_t)clock_boottime;
+       *nanosecs = (clock_nsec_t)clock_boottime_usec * NSEC_PER_USEC;
 
-       UNLOCK_CLOCK(s);
+       clock_unlock();
+       splx(s);
 }
 
 /*
- * CLOCK PRIVATE SERVICING SUBROUTINES.
+ *     clock_get_boottime_nanotime:
+ *
+ *     Return the boottime, used by sysctl.
  */
+void
+clock_get_boottime_microtime(
+       clock_sec_t                     *secs,
+       clock_usec_t            *microsecs)
+{
+       spl_t   s;
+
+       s = splclock();
+       clock_lock();
+
+       *secs = (clock_sec_t)clock_boottime;
+       *microsecs = (clock_nsec_t)clock_boottime_usec;
+
+       clock_unlock();
+       splx(s);
+}
 
 /*
- * Flush all pending alarms on a clock. All alarms
- * are activated and timestamped correctly, so any
- * programs waiting on alarms/threads will proceed
- * with accurate information.
+ *     clock_adjtime:
+ *
+ *     Interface to adjtime() syscall.
+ *
+ *     Calculates adjustment variables and
+ *     initiates adjustment.
  */
-static
 void
-flush_alarms(
-       clock_t                         clock)
+clock_adjtime(
+       long            *secs,
+       int                     *microsecs)
 {
-       register alarm_t        alrm1, alrm2;
-       spl_t                           s;
+       uint32_t        interval;
+       spl_t           s;
 
-       /*
-        * Flush all outstanding alarms.
-        */
-       LOCK_CLOCK(s);
-       alrm1 = (alarm_t) &clock->cl_alarm;
-       while ((alrm2 = alrm1->al_next) != NULL) {
-               /*
-                * Remove alarm from the clock alarm list.
-                */  
-               if ((alrm1->al_next = alrm2->al_next) != NULL)
-                       (alrm1->al_next)->al_prev = alrm1;
+       s = splclock();
+       clock_lock();
 
-               /*
-                * If a clock_sleep() alarm, wakeup the thread
-                * which issued the clock_sleep() call.
-                */
-               if (alrm2->al_status == ALARM_SLEEP) {
-                       alrm2->al_next = 0;
-                       thread_wakeup((event_t)alrm2);
-               }
-               else {
-                       /*
-                        * If a clock_alarm() alarm, place the alarm on
-                        * the alarm done list and wakeup the dedicated
-                        * kernel alarm_thread to service the alarm.
-                        */
-                       assert(alrm2->al_status == ALARM_CLOCK);
-                       if ((alrm2->al_next = alrmdone) != NULL)
-                               alrmdone->al_prev = alrm2;
-                       else
-                               thread_wakeup((event_t)&alrmdone);
-                       alrm2->al_prev = (alarm_t) &alrmdone;
-                       alrmdone = alrm2;
-               }
+       interval = calend_set_adjustment(secs, microsecs);
+       if (interval != 0) {
+               calend_adjdeadline = mach_absolute_time() + interval;
+               if (!timer_call_enter(&calend_adjcall, calend_adjdeadline, TIMER_CALL_SYS_CRITICAL))
+                       calend_adjactive++;
        }
-       UNLOCK_CLOCK(s);
+       else
+       if (timer_call_cancel(&calend_adjcall))
+               calend_adjactive--;
+
+       clock_unlock();
+       splx(s);
 }
 
-/*
- * Post an alarm on a clock's active alarm list. The alarm is
- * inserted in time-order into the clock's active alarm list.
- * Always called from within a LOCK_CLOCK() code section.
- */
-static
-void
-post_alarm(
-       clock_t                         clock,
-       alarm_t                         alarm)
+static uint32_t
+calend_set_adjustment(
+       long                    *secs,
+       int                             *microsecs)
 {
-       register alarm_t        alrm1, alrm2;
-       mach_timespec_t         *alarm_time;
-       mach_timespec_t         *queue_time;
+       uint64_t                now, t64;
+       int64_t                 total, ototal;
+       uint32_t                interval = 0;
 
-       /*
-        * Traverse alarm list until queue time is greater
-        * than alarm time, then insert alarm.
+       /* 
+        * Compute the total adjustment time in nanoseconds.
         */
-       alarm_time = &alarm->al_time;
-       alrm1 = (alarm_t) &clock->cl_alarm;
-       while ((alrm2 = alrm1->al_next) != NULL) {
-               queue_time = &alrm2->al_time;
-               if (CMP_MACH_TIMESPEC(queue_time, alarm_time) > 0)
-                       break;
-               alrm1 = alrm2;
-       }
-       alrm1->al_next = alarm;
-       alarm->al_next = alrm2;
-       alarm->al_prev = alrm1;
-       if (alrm2)
-               alrm2->al_prev  = alarm;
+       total = ((int64_t)*secs * (int64_t)NSEC_PER_SEC) + (*microsecs * (int64_t)NSEC_PER_USEC);
+
+       /* 
+        * Disable commpage gettimeofday().
+        */
+       commpage_disable_timestamp();
+
+       /* 
+        * Get current absolute time.
+        */
+       now = mach_absolute_time();
+
+       /* 
+        * Save the old adjustment total for later return.
+        */
+       ototal = calend_adjtotal;
 
        /*
-        * If the inserted alarm is the 'earliest' alarm,
-        * reset the device layer alarm time accordingly.
+        * Is a new correction specified?
         */
-       if (clock->cl_alarm.al_next == alarm)
-               (*clock->cl_ops->c_setalrm)(alarm_time);
-}
+       if (total != 0) {
+               /*
+                * Set delta to the standard, small, adjustment skew.
+                */
+               int32_t         delta = calend_adjskew;
 
-/*
- * Check the validity of 'alarm_time' and 'alarm_type'. If either
- * argument is invalid, return a negative value. If the 'alarm_time'
- * is now, return a 0 value. If the 'alarm_time' is in the future,
- * return a positive value.
- */
-static
-int
-check_time(
-       alarm_type_t            alarm_type,
-       mach_timespec_t         *alarm_time,
-       mach_timespec_t         *clock_time)
-{
-       int                                     result;
+               if (total > 0) {
+                       /*
+                        * Positive adjustment. If greater than the preset 'big' 
+                        * threshold, slew at a faster rate, capping if necessary.
+                        */
+                       if (total > (int64_t) calend_adjbig)
+                               delta *= 10;
+                       if (delta > total)
+                               delta = (int32_t)total;
 
-       if (BAD_ALRMTYPE(alarm_type))
-               return (-1);
-       if (BAD_MACH_TIMESPEC(alarm_time))
-               return (-1);
-       if ((alarm_type & ALRMTYPE) == TIME_RELATIVE)
-               ADD_MACH_TIMESPEC(alarm_time, clock_time);
+                       /* 
+                        * Convert the delta back from ns to absolute time and store in adjoffset.
+                        */
+                       nanoseconds_to_absolutetime((uint64_t)delta, &t64);
+                       clock_calend.adjoffset = (uint32_t)t64;
+               }
+               else {
+                       /*
+                        * Negative adjustment; therefore, negate the delta. If 
+                        * greater than the preset 'big' threshold, slew at a faster 
+                        * rate, capping if necessary.
+                        */
+                       if (total < (int64_t) -calend_adjbig)
+                               delta *= 10;
+                       delta = -delta;
+                       if (delta < total)
+                               delta = (int32_t)total;
+
+                       /* 
+                        * Save the current absolute time. Subsequent time operations occuring
+                        * during this negative correction can make use of this value to ensure 
+                        * that time increases monotonically.
+                        */
+                       clock_calend.adjstart = now;
 
-       result = CMP_MACH_TIMESPEC(alarm_time, clock_time);
+                       /* 
+                        * Convert the delta back from ns to absolute time and store in adjoffset.
+                        */
+                       nanoseconds_to_absolutetime((uint64_t)-delta, &t64);
+                       clock_calend.adjoffset = (uint32_t)t64;
+               }
 
-       return ((result >= 0)? result: 0);
-}
+               /* 
+                * Store the total adjustment time in ns. 
+                */
+               calend_adjtotal = total;
+               
+               /* 
+                * Store the delta for this adjustment period in ns. 
+                */
+               clock_calend.adjdelta = delta;
 
-mach_timespec_t
-clock_get_system_value(void)
-{
-       clock_t                         clock = &clock_list[SYSTEM_CLOCK];
-       mach_timespec_t         value;
+               /* 
+                * Set the interval in absolute time for later return. 
+                */
+               interval = calend_adjinterval;
+       }
+       else {
+               /* 
+                * No change; clear any prior adjustment.
+                */
+               calend_adjtotal = clock_calend.adjdelta = 0;
+       }
 
-       (void) (*clock->cl_ops->c_gettime)(&value);
+       /* 
+        * If an prior correction was in progress, return the
+        * remaining uncorrected time from it. 
+        */
+       if (ototal != 0) {
+               *secs = (long)(ototal / (long)NSEC_PER_SEC);
+               *microsecs = (int)((ototal % (int)NSEC_PER_SEC) / (int)NSEC_PER_USEC);
+       }
+       else
+               *secs = *microsecs = 0;
 
-       return value;
+#if CONFIG_DTRACE
+       clock_track_calend_nowait();
+#endif
+       
+       return (interval);
 }
 
-mach_timespec_t
-clock_get_calendar_value(void)
+static void
+calend_adjust_call(void)
 {
-       clock_t                         clock = &clock_list[CALENDAR_CLOCK];
-       mach_timespec_t         value = MACH_TIMESPEC_ZERO;
+       uint32_t        interval;
+       spl_t           s;
+
+       s = splclock();
+       clock_lock();
 
-       (void) (*clock->cl_ops->c_gettime)(&value);
+       if (--calend_adjactive == 0) {
+               interval = calend_adjust();
+               if (interval != 0) {
+                       clock_deadline_for_periodic_event(interval, mach_absolute_time(), &calend_adjdeadline);
 
-       return value;
+                       if (!timer_call_enter(&calend_adjcall, calend_adjdeadline, TIMER_CALL_SYS_CRITICAL))
+                               calend_adjactive++;
+               }
+       }
+
+       clock_unlock();
+       splx(s);
 }
 
-void
-clock_deadline_for_periodic_event(
-       uint64_t                        interval,
-       uint64_t                        abstime,
-       uint64_t                        *deadline)
+static uint32_t
+calend_adjust(void)
 {
-       assert(interval != 0);
+       uint64_t                now, t64;
+       int32_t                 delta;
+       uint32_t                interval = 0;
 
-       *deadline += interval;
+       commpage_disable_timestamp();
 
-       if (*deadline <= abstime) {
-               *deadline = abstime + interval;
-               abstime = mach_absolute_time();
+       now = mach_absolute_time();
 
-               if (*deadline <= abstime)
-                       *deadline = abstime + interval;
-       }
-}
+       delta = clock_calend.adjdelta;
 
-void
-mk_timebase_info_trap(
-       struct mk_timebase_info_trap_args *args)
-{
-       uint32_t                                        *delta = args->delta;
-       uint32_t                                        *abs_to_ns_numer = args->abs_to_ns_numer;
-       uint32_t                                        *abs_to_ns_denom = args->abs_to_ns_denom;
-       uint32_t                                        *proc_to_abs_numer = args->proc_to_abs_numer;
-       uint32_t                                        *proc_to_abs_denom = args->proc_to_abs_denom;
-       mach_timebase_info_data_t       info;
-       uint32_t                                        one = 1;
+       if (delta > 0) {
+               clock_calend.offset += clock_calend.adjoffset;
 
-       clock_timebase_info(&info);
+               calend_adjtotal -= delta;
+               if (delta > calend_adjtotal) {
+                       clock_calend.adjdelta = delta = (int32_t)calend_adjtotal;
 
-       copyout((void *)&one, CAST_USER_ADDR_T(delta), sizeof (uint32_t));
+                       nanoseconds_to_absolutetime((uint64_t)delta, &t64);
+                       clock_calend.adjoffset = (uint32_t)t64;
+               }
+       }
+       else
+               if (delta < 0) {
+                       clock_calend.offset -= clock_calend.adjoffset;
 
-       copyout((void *)&info.numer, CAST_USER_ADDR_T(abs_to_ns_numer), sizeof (uint32_t));
-       copyout((void *)&info.denom, CAST_USER_ADDR_T(abs_to_ns_denom), sizeof (uint32_t));
+                       calend_adjtotal -= delta;
+                       if (delta < calend_adjtotal) {
+                               clock_calend.adjdelta = delta = (int32_t)calend_adjtotal;
 
-       copyout((void *)&one, CAST_USER_ADDR_T(proc_to_abs_numer), sizeof (uint32_t));
-       copyout((void *)&one, CAST_USER_ADDR_T(proc_to_abs_denom), sizeof (uint32_t));
-}
+                               nanoseconds_to_absolutetime((uint64_t)-delta, &t64);
+                               clock_calend.adjoffset = (uint32_t)t64;
+                       }
 
-kern_return_t
-mach_timebase_info_trap(
-       struct mach_timebase_info_trap_args *args)
-{
-       mach_vm_address_t                       out_info_addr = args->info;
-       mach_timebase_info_data_t       info;
+                       if (clock_calend.adjdelta != 0)
+                               clock_calend.adjstart = now;
+               }
 
-       clock_timebase_info(&info);
+       if (clock_calend.adjdelta != 0)
+               interval = calend_adjinterval;
 
-       copyout((void *)&info, out_info_addr, sizeof (info));
+#if CONFIG_DTRACE
+       clock_track_calend_nowait();
+#endif
 
-       return (KERN_SUCCESS);
+       return (interval);
 }
 
+/*
+ *     Wait / delay routines.
+ */
 static void
 mach_wait_until_continue(
        __unused void   *parameter,
@@ -881,6 +890,15 @@ mach_wait_until_continue(
        /*NOTREACHED*/
 }
 
+/*
+ * mach_wait_until_trap: Suspend execution of calling thread until the specified time has passed
+ *
+ * Parameters:    args->deadline          Amount of time to wait
+ *
+ * Returns:        0                      Success
+ *                !0                      Not success           
+ *
+ */
 kern_return_t
 mach_wait_until_trap(
        struct mach_wait_until_trap_args        *args)
@@ -888,16 +906,14 @@ mach_wait_until_trap(
        uint64_t                deadline = args->deadline;
        wait_result_t   wresult;
 
-       wresult = assert_wait_deadline((event_t)mach_wait_until_trap, THREAD_ABORTSAFE, deadline);
+       wresult = assert_wait_deadline_with_leeway((event_t)mach_wait_until_trap, THREAD_ABORTSAFE,
+                                                  TIMEOUT_URGENCY_USER_NORMAL, deadline, 0);
        if (wresult == THREAD_WAITING)
                wresult = thread_block(mach_wait_until_continue);
 
        return ((wresult == THREAD_INTERRUPTED)? KERN_ABORTED: KERN_SUCCESS);
 }
 
-/*
- * Delay primitives.
- */
 void
 clock_delay_until(
        uint64_t                deadline)
@@ -907,12 +923,50 @@ clock_delay_until(
        if (now >= deadline)
                return;
 
-       if (    (deadline - now) < (8 * sched_cswtime)  ||
+       _clock_delay_until_deadline(deadline - now, deadline);
+}
+
+/*
+ * Preserve the original precise interval that the client
+ * requested for comparison to the spin threshold.
+ */
+void
+_clock_delay_until_deadline(
+       uint64_t                interval,
+       uint64_t                deadline)
+{
+       _clock_delay_until_deadline_with_leeway(interval, deadline, 0);
+}
+
+/*
+ * Like _clock_delay_until_deadline, but it accepts a
+ * leeway value.
+ */
+void
+_clock_delay_until_deadline_with_leeway(
+       uint64_t                interval,
+       uint64_t                deadline,
+       uint64_t                leeway)
+{
+
+       if (interval == 0)
+               return;
+
+       if (    ml_delay_should_spin(interval)  ||
                        get_preemption_level() != 0                             ||
-                       ml_get_interrupts_enabled() == FALSE    )
-               machine_delay_until(deadline);
-       else {
-               assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline - sched_cswtime);
+                       ml_get_interrupts_enabled() == FALSE    ) {
+               machine_delay_until(interval, deadline);
+       } else {
+               /*
+                * For now, assume a leeway request of 0 means the client does not want a leeway
+                * value. We may want to change this interpretation in the future.
+                */
+
+               if (leeway) {
+                       assert_wait_deadline_with_leeway((event_t)clock_delay_until, THREAD_UNINT, TIMEOUT_URGENCY_LEEWAY, deadline, leeway);
+               } else {
+                       assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline);
+               }
 
                thread_block(THREAD_CONTINUE_NULL);
        }
@@ -923,11 +977,26 @@ delay_for_interval(
        uint32_t                interval,
        uint32_t                scale_factor)
 {
-       uint64_t                end;
+       uint64_t                abstime;
 
-       clock_interval_to_deadline(interval, scale_factor, &end);
+       clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
 
-       clock_delay_until(end);
+       _clock_delay_until_deadline(abstime, mach_absolute_time() + abstime);
+}
+
+void
+delay_for_interval_with_leeway(
+       uint32_t                interval,
+       uint32_t                leeway,
+       uint32_t                scale_factor)
+{
+       uint64_t                abstime_interval;
+       uint64_t                abstime_leeway;
+
+       clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime_interval);
+       clock_interval_to_absolutetime_interval(leeway, scale_factor, &abstime_leeway);
+
+       _clock_delay_until_deadline_with_leeway(abstime_interval, mach_absolute_time() + abstime_interval, abstime_leeway);
 }
 
 void
@@ -937,69 +1006,215 @@ delay(
        delay_for_interval((usec < 0)? -usec: usec, NSEC_PER_USEC);
 }
 
+/*
+ *     Miscellaneous routines.
+ */
 void
-clock_adjtime(
-       int32_t         *secs,
-       int32_t         *microsecs)
+clock_interval_to_deadline(
+       uint32_t                        interval,
+       uint32_t                        scale_factor,
+       uint64_t                        *result)
 {
-       uint32_t        interval;
-       spl_t           s;
+       uint64_t        abstime;
 
-       s = splclock();
-       simple_lock(&calend_adjlock);
+       clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
 
-       interval = clock_set_calendar_adjtime(secs, microsecs);
-       if (interval != 0) {
-               if (calend_adjdeadline >= interval)
-                       calend_adjdeadline -= interval;
-               clock_deadline_for_periodic_event(interval, mach_absolute_time(),
-                                                                                               &calend_adjdeadline);
+       *result = mach_absolute_time() + abstime;
+}
 
-               timer_call_enter(&calend_adjcall, calend_adjdeadline);
-       }
-       else
-               timer_call_cancel(&calend_adjcall);
+void
+clock_absolutetime_interval_to_deadline(
+       uint64_t                        abstime,
+       uint64_t                        *result)
+{
+       *result = mach_absolute_time() + abstime;
+}
 
-       simple_unlock(&calend_adjlock);
-       splx(s);
+void
+clock_continuoustime_interval_to_deadline(
+       uint64_t                        conttime,
+       uint64_t                        *result)
+{
+       *result = mach_continuous_time() + conttime;
 }
 
-static void
-calend_adjust_call(
-       __unused timer_call_param_t             p0,
-       __unused timer_call_param_t             p1)
+void
+clock_get_uptime(
+       uint64_t        *result)
 {
-       uint32_t        interval;
-       spl_t           s;
+       *result = mach_absolute_time();
+}
 
-       s = splclock();
-       simple_lock(&calend_adjlock);
+void
+clock_deadline_for_periodic_event(
+       uint64_t                        interval,
+       uint64_t                        abstime,
+       uint64_t                        *deadline)
+{
+       assert(interval != 0);
 
-       interval = clock_adjust_calendar();
-       if (interval != 0) {
-               clock_deadline_for_periodic_event(interval, mach_absolute_time(),
-                                                                                               &calend_adjdeadline);
+       *deadline += interval;
 
-               timer_call_enter(&calend_adjcall, calend_adjdeadline);
+       if (*deadline <= abstime) {
+               *deadline = abstime + interval;
+               abstime = mach_absolute_time();
+
+               if (*deadline <= abstime)
+                       *deadline = abstime + interval;
        }
+}
 
-       simple_unlock(&calend_adjlock);
-       splx(s);
+uint64_t
+mach_continuous_time(void)
+{
+       while(1) {      
+               uint64_t read1 = mach_absolutetime_asleep;
+               uint64_t absolute = mach_absolute_time();
+               OSMemoryBarrier();
+               uint64_t read2 = mach_absolutetime_asleep;
+
+               if(__builtin_expect(read1 == read2, 1)) {
+                       return absolute + read1;
+               }
+       }
 }
 
-void
-clock_wakeup_calendar(void)
+uint64_t
+mach_continuous_approximate_time(void)
 {
-       thread_call_enter(&calend_wakecall);
+       while(1) {
+               uint64_t read1 = mach_absolutetime_asleep;
+               uint64_t absolute = mach_approximate_time();
+               OSMemoryBarrier();
+               uint64_t read2 = mach_absolutetime_asleep;
+
+               if(__builtin_expect(read1 == read2, 1)) {
+                       return absolute + read1;
+               }
+       }
 }
 
-extern void            IOKitResetTime(void); /* XXX */
+/*
+ * continuoustime_to_absolutetime
+ * Must be called with interrupts disabled
+ * Returned value is only valid until the next update to
+ * mach_continuous_time 
+ */
+uint64_t
+continuoustime_to_absolutetime(uint64_t conttime) {
+       if (conttime <= mach_absolutetime_asleep)
+               return 0;
+       else
+               return conttime - mach_absolutetime_asleep;
+}
 
-static void
-calend_dowakeup(
-       __unused thread_call_param_t            p0,
-       __unused thread_call_param_t            p1)
+/*
+ * absolutetime_to_continuoustime
+ * Must be called with interrupts disabled
+ * Returned value is only valid until the next update to
+ * mach_continuous_time 
+ */
+uint64_t
+absolutetime_to_continuoustime(uint64_t abstime) {
+       return abstime + mach_absolutetime_asleep;
+}
+
+#if    CONFIG_DTRACE
+
+/*
+ * clock_get_calendar_nanotime_nowait
+ *
+ * Description:        Non-blocking version of clock_get_calendar_nanotime()
+ *
+ * Notes:      This function operates by separately tracking calendar time
+ *             updates using a two element structure to copy the calendar
+ *             state, which may be asynchronously modified.  It utilizes
+ *             barrier instructions in the tracking process and in the local
+ *             stable snapshot process in order to ensure that a consistent
+ *             snapshot is used to perform the calculation.
+ */
+void
+clock_get_calendar_nanotime_nowait(
+       clock_sec_t                     *secs,
+       clock_nsec_t            *nanosecs)
+{
+       int i = 0;
+       uint64_t                now;
+       struct unlocked_clock_calend stable;
+
+       for (;;) {
+               stable = flipflop[i];           /* take snapshot */
+
+               /*
+                * Use a barrier instructions to ensure atomicity.  We AND
+                * off the "in progress" bit to get the current generation
+                * count.
+                */
+               (void)hw_atomic_and(&stable.gen, ~(uint32_t)1);
+
+               /*
+                * If an update _is_ in progress, the generation count will be
+                * off by one, if it _was_ in progress, it will be off by two,
+                * and if we caught it at a good time, it will be equal (and
+                * our snapshot is threfore stable).
+                */
+               if (flipflop[i].gen == stable.gen)
+                       break;
+
+               /* Switch to the oher element of the flipflop, and try again. */
+               i ^= 1;
+       }
+
+       now = mach_absolute_time();
+
+       if (stable.calend.adjdelta < 0) {
+               uint32_t        t32;
+
+               if (now > stable.calend.adjstart) {
+                       t32 = (uint32_t)(now - stable.calend.adjstart);
+
+                       if (t32 > stable.calend.adjoffset)
+                               now -= stable.calend.adjoffset;
+                       else
+                               now = stable.calend.adjstart;
+               }
+       }
+
+       now += stable.calend.offset;
+
+       absolutetime_to_microtime(now, secs, nanosecs);
+       *nanosecs *= NSEC_PER_USEC;
+
+       *secs += (clock_sec_t)stable.calend.epoch;
+}
+
+static void 
+clock_track_calend_nowait(void)
 {
+       int i;
+
+       for (i = 0; i < 2; i++) {
+               struct clock_calend tmp = clock_calend;
+
+               /*
+                * Set the low bit if the generation count; since we use a
+                * barrier instruction to do this, we are guaranteed that this
+                * will flag an update in progress to an async caller trying
+                * to examine the contents.
+                */
+               (void)hw_atomic_or(&flipflop[i].gen, 1);
+
+               flipflop[i].calend = tmp;
 
-       IOKitResetTime();
+               /*
+                * Increment the generation count to clear the low bit to
+                * signal completion.  If a caller compares the generation
+                * count after taking a copy while in progress, the count
+                * will be off by two.
+                */
+               (void)hw_atomic_add(&flipflop[i].gen, 1);
+       }
 }
+
+#endif /* CONFIG_DTRACE */
+