]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/rtclock.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / osfmk / i386 / rtclock.c
index 43ab1f3be79d6f54858c4cc65e902132a903415e..6ed44cc7357b52b2e899081369c1caeeb7280390 100644 (file)
@@ -1,16 +1,19 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -20,7 +23,7 @@
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
 /*
  *     File:           i386/rtclock.c
  *     Purpose:        Routines for handling the machine dependent
- *                     real-time clock. This clock is generated by
- *                     the Intel 8254 Programmable Interval Timer.
+ *                     real-time clock. Historically, this clock is
+ *                     generated by the Intel 8254 Programmable Interval
+ *                     Timer, but local apic timers are now used for
+ *                     this purpose with the master time reference being
+ *                     the cpu clock counted by the timestamp MSR.
  */
 
-#include <cpus.h>
-#include <platforms.h>
-#include <mp_v1_1.h>
-#include <mach_kdb.h>
-#include <kern/cpu_number.h>
+
+#include <mach/mach_types.h>
+
 #include <kern/cpu_data.h>
+#include <kern/cpu_number.h>
 #include <kern/clock.h>
+#include <kern/host_notify.h>
 #include <kern/macro_help.h>
 #include <kern/misc_protos.h>
 #include <kern/spl.h>
-#include <machine/mach_param.h>        /* HZ */
+#include <kern/assert.h>
+#include <kern/timer_queue.h>
 #include <mach/vm_prot.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>                /* for kernel_map */
-#include <i386/ipl.h>
-#include <i386/pit.h>
-#include <i386/pio.h>
+#include <architecture/i386/pio.h>
+#include <i386/machine_cpu.h>
+#include <i386/cpuid.h>
+#include <i386/cpu_threads.h>
+#include <i386/mp.h>
+#include <i386/machine_routines.h>
+#include <i386/pal_routines.h>
+#include <i386/proc_reg.h>
 #include <i386/misc_protos.h>
-#include <i386/rtclock_entries.h>
-#include <i386/hardclock_entries.h>
-
-int            sysclk_config(void);
+#include <pexpert/pexpert.h>
+#include <machine/limits.h>
+#include <machine/commpage.h>
+#include <sys/kdebug.h>
+#include <i386/tsc.h>
+#include <i386/rtclock_protos.h>
+#define UI_CPUFREQ_ROUNDING_FACTOR     10000000
 
-int            sysclk_init(void);
+int            rtclock_init(void);
 
-kern_return_t  sysclk_gettime(
-       mach_timespec_t                 *cur_time);
+uint64_t       tsc_rebase_abs_time = 0;
 
-kern_return_t  sysclk_getattr(
-       clock_flavor_t                  flavor,
-       clock_attr_t                    attr,
-       mach_msg_type_number_t  *count);
+static void    rtc_set_timescale(uint64_t cycles);
+static uint64_t        rtc_export_speed(uint64_t cycles);
 
-kern_return_t  sysclk_setattr(
-       clock_flavor_t                  flavor,
-       clock_attr_t                    attr,
-       mach_msg_type_number_t  count);
-
-void           sysclk_setalarm(
-       mach_timespec_t                 *alarm_time);
-
-extern void (*IOKitRegisterInterruptHook)(void *,  int irq, int isclock);
-
-/*
- * Inlines to get timestamp counter value.
- */
-
-static inline void rdtsc_hilo(uint32_t *hi, uint32_t *lo) {
-        asm volatile("rdtsc": "=a" (*lo), "=d" (*hi));
+void
+rtc_timer_start(void)
+{
+       /*
+        * Force a complete re-evaluation of timer deadlines.
+        */
+       x86_lcpu()->rtcDeadline = EndOfAllTime;
+       timer_resync_deadlines();
 }
 
-static inline uint64_t rdtsc_64(void) {
-       uint64_t result;
-        asm volatile("rdtsc": "=A" (result));
-       return result;
+static inline uint32_t
+_absolutetime_to_microtime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *microsecs)
+{
+       uint32_t remain;
+       *secs = abstime / (uint64_t)NSEC_PER_SEC;
+       remain = (uint32_t)(abstime % (uint64_t)NSEC_PER_SEC);
+       *microsecs = remain / NSEC_PER_USEC;
+       return remain;
 }
 
-/*
- * Lists of clock routines.
- */
-struct clock_ops  sysclk_ops = {
-       sysclk_config,                  sysclk_init,
-       sysclk_gettime,                 0,
-       sysclk_getattr,                 sysclk_setattr,
-       sysclk_setalarm,
-};
-
-int            calend_config(void);
-
-int            calend_init(void);
-
-kern_return_t  calend_gettime(
-       mach_timespec_t                 *cur_time);
-
-kern_return_t  calend_settime(
-       mach_timespec_t                 *cur_time);
-
-kern_return_t  calend_getattr(
-       clock_flavor_t                  flavor,
-       clock_attr_t                    attr,
-       mach_msg_type_number_t  *count);
-
-struct clock_ops calend_ops = {
-       calend_config,                  calend_init,
-       calend_gettime,                 calend_settime,
-       calend_getattr,                 0,
-       0,
-};
-
-/* local data declarations */
-mach_timespec_t                *RtcTime = (mach_timespec_t *)0;
-mach_timespec_t                *RtcAlrm;
-clock_res_t                    RtcDelt;
-
-/* global data declarations */
-struct {
-       uint64_t                        abstime;
-
-       mach_timespec_t         time;
-       mach_timespec_t         alarm_time;     /* time of next alarm */
-
-       mach_timespec_t         calend_offset;
-       boolean_t                       calend_is_set;
-
-       uint64_t                        timer_deadline;
-       boolean_t                       timer_is_set;
-       clock_timer_func_t      timer_expire;
-
-       clock_res_t                     new_ires;       /* pending new resolution (nano ) */
-       clock_res_t                     intr_nsec;      /* interrupt resolution (nano) */
-
-       decl_simple_lock_data(,lock)    /* real-time clock device lock */
-} rtclock;
-
-unsigned int           clknum;                        /* clks per second */
-unsigned int           new_clknum;                    /* pending clknum */
-unsigned int           time_per_clk;                  /* time per clk in ZHZ */
-unsigned int           clks_per_int;                  /* clks per interrupt */
-unsigned int           clks_per_int_99;
-int                                    rtc_intr_count;                /* interrupt counter */
-int                                    rtc_intr_hertz;                /* interrupts per HZ */
-int                                    rtc_intr_freq;                 /* interrupt frequency */
-int                                    rtc_print_lost_tick;           /* print lost tick */
-
-uint32_t               rtc_cyc_per_sec;                /* processor cycles per seconds */
-uint32_t               rtc_last_int_tsc_lo;            /* tsc values saved per interupt */
-uint32_t               rtc_last_int_tsc_hi;
-
-/*
- *     Macros to lock/unlock real-time clock device.
- */
-#define LOCK_RTC(s)                                    \
-MACRO_BEGIN                                                    \
-       (s) = splclock();                               \
-       simple_lock(&rtclock.lock);             \
-MACRO_END
-
-#define UNLOCK_RTC(s)                          \
-MACRO_BEGIN                                                    \
-       simple_unlock(&rtclock.lock);   \
-       splx(s);                                                \
-MACRO_END
+static inline void
+_absolutetime_to_nanotime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *nanosecs)
+{
+       *secs = abstime / (uint64_t)NSEC_PER_SEC;
+       *nanosecs = (clock_usec_t)(abstime % (uint64_t)NSEC_PER_SEC);
+}
 
 /*
- * i8254 control.  ** MONUMENT **
+ * Nanotime/mach_absolutime_time
+ * -----------------------------
+ * The timestamp counter (TSC) - which counts cpu clock cycles and can be read
+ * efficiently by the kernel and in userspace - is the reference for all timing.
+ * The cpu clock rate is platform-dependent and may stop or be reset when the
+ * processor is napped/slept.  As a result, nanotime is the software abstraction
+ * used to maintain a monotonic clock, adjusted from an outside reference as needed.
  *
- * The i8254 is a traditional PC device with some arbitrary characteristics.
- * Basically, it is a register that counts at a fixed rate and can be
- * programmed to generate an interrupt every N counts.  The count rate is
- * clknum counts per second (see pit.h), historically 1193167 we believe.
- * Various constants are computed based on this value, and we calculate
- * them at init time for execution efficiency.  To obtain sufficient
- * accuracy, some of the calculation are most easily done in floating
- * point and then converted to int.
+ * The kernel maintains nanotime information recording:
+ *     - the ratio of tsc to nanoseconds
+ *       with this ratio expressed as a 32-bit scale and shift
+ *       (power of 2 divider);
+ *     - { tsc_base, ns_base } pair of corresponding timestamps.
  *
- * We want an interrupt every 10 milliseconds, approximately.  The count
- * which will do that is clks_per_int.  However, that many counts is not
- * *exactly* 10 milliseconds; it is a bit more or less depending on
- * roundoff.  The actual time per tick is calculated and saved in
- * rtclock.intr_nsec, and it is that value which is added to the time
- * register on each tick.
+ * The tuple {tsc_base, ns_base, scale, shift} is exported in the commpage 
+ * for the userspace nanotime routine to read.
  *
- * The i8254 counter can be read between interrupts in order to determine
- * the time more accurately.  The counter counts down from the preset value
- * toward 0, and we have to handle the case where the counter has been
- * reset just before being read and before the interrupt has been serviced.
- * Given a count since the last interrupt, the time since then is given
- * by (count * time_per_clk).  In order to minimize integer truncation,
- * we perform this calculation in an arbitrary unit of time which maintains
- * the maximum precision, i.e. such that one tick is 1.0e9 of these units,
- * or close to the precision of a 32-bit int.  We then divide by this unit
- * (which doesn't lose precision) to get nanoseconds.  For notation
- * purposes, this unit is defined as ZHZ = zanoseconds per nanosecond.
- *
- * This sequence to do all this is in sysclk_gettime.  For efficiency, this
- * sequence also needs the value that the counter will have if it has just
- * overflowed, so we precompute that also.  ALSO, certain platforms
- * (specifically the DEC XL5100) have been observed to have problem
- * with latching the counter, and they occasionally (say, one out of
- * 100,000 times) return a bogus value.  Hence, the present code reads
- * the counter twice and checks for a consistent pair of values.
- *
- * Some attributes of the rt clock can be changed, including the
- * interrupt resolution.  We default to the minimum resolution (10 ms),
- * but allow a finer resolution to be requested.  The assumed frequency
- * of the clock can also be set since it appears that the actual
- * frequency of real-world hardware can vary from the nominal by
- * 200 ppm or more.  When the frequency is set, the values above are
- * recomputed and we continue without resetting or changing anything else.
+ * All of the routines which update the nanotime data are non-reentrant.  This must
+ * be guaranteed by the caller.
  */
-#define RTC_MINRES     (NSEC_PER_SEC / HZ)     /* nsec per tick */
-#define        RTC_MAXRES      (RTC_MINRES / 20)       /* nsec per tick */
-#define        ZANO            (1000000000)
-#define ZHZ             (ZANO / (NSEC_PER_SEC / HZ))
-#define READ_8254(val) { \
-        outb(PITCTL_PORT, PIT_C0);             \
-       (val) = inb(PITCTR0_PORT);               \
-       (val) |= inb(PITCTR0_PORT) << 8 ; }
-
-/*
- * Calibration delay counts.
- */
-unsigned int   delaycount = 100;
-unsigned int   microdata = 50;
+static inline void
+rtc_nanotime_set_commpage(pal_rtc_nanotime_t *rntp)
+{
+       commpage_set_nanotime(rntp->tsc_base, rntp->ns_base, rntp->scale, rntp->shift);
+}
 
 /*
- * Forward decl.
+ * rtc_nanotime_init:
+ *
+ * Intialize the nanotime info from the base time.
  */
+static inline void
+_rtc_nanotime_init(pal_rtc_nanotime_t *rntp, uint64_t base)
+{
+       uint64_t        tsc = rdtsc64();
 
-extern int   measure_delay(int us);
-void         rtc_setvals( unsigned int, clock_res_t );
+       _pal_rtc_nanotime_store(tsc, base, rntp->scale, rntp->shift, rntp);
+}
 
-static void  rtc_set_cyc_per_sec();
+void
+rtc_nanotime_init(uint64_t base)
+{
+       _rtc_nanotime_init(&pal_rtc_nanotime_info, base);
+       rtc_nanotime_set_commpage(&pal_rtc_nanotime_info);
+}
 
 /*
- * Initialize non-zero clock structure values.
+ * rtc_nanotime_init_commpage:
+ *
+ * Call back from the commpage initialization to
+ * cause the commpage data to be filled in once the
+ * commpages have been created.
  */
 void
-rtc_setvals(
-       unsigned int new_clknum,
-       clock_res_t  new_ires
-       )
+rtc_nanotime_init_commpage(void)
 {
-    unsigned int timeperclk;
-    unsigned int scale0;
-    unsigned int scale1;
-    unsigned int res;
-
-    clknum = new_clknum;
-    rtc_intr_freq = (NSEC_PER_SEC / new_ires);
-    rtc_intr_hertz = rtc_intr_freq / HZ;
-    clks_per_int = (clknum + (rtc_intr_freq / 2)) / rtc_intr_freq;
-    clks_per_int_99 = clks_per_int - clks_per_int/100;
-
-    /*
-     * The following calculations are done with scaling integer operations
-     * in order that the integer results are accurate to the lsb.
-     */
-    timeperclk = div_scale(ZANO, clknum, &scale0);     /* 838.105647 nsec */
-
-    time_per_clk = mul_scale(ZHZ, timeperclk, &scale1);        /* 83810 */
-    if (scale0 > scale1)
-       time_per_clk >>= (scale0 - scale1);
-    else if (scale0 < scale1)
-       panic("rtc_clock: time_per_clk overflow\n");
-
-    /*
-     * Notice that rtclock.intr_nsec is signed ==> use unsigned int res
-     */
-    res = mul_scale(clks_per_int, timeperclk, &scale1);        /* 10000276 */
-    if (scale0 > scale1)
-       rtclock.intr_nsec = res >> (scale0 - scale1);
-    else
-       panic("rtc_clock: rtclock.intr_nsec overflow\n");
-
-    rtc_intr_count = 1;
-    RtcDelt = rtclock.intr_nsec/2;
+       spl_t                   s = splclock();
+
+       rtc_nanotime_set_commpage(&pal_rtc_nanotime_info);
+       splx(s);
 }
 
 /*
- * Configure the real-time clock device. Return success (1)
- * or failure (0).
+ * rtc_nanotime_read:
+ *
+ * Returns the current nanotime value, accessable from any
+ * context.
  */
-
-int
-sysclk_config(void)
+static inline uint64_t
+rtc_nanotime_read(void)
 {
-       int     RtcFlag;
-       int     pic;
-
-#if    NCPUS > 1
-       mp_disable_preemption();
-       if (cpu_number() != master_cpu) {
-               mp_enable_preemption();
-               return(1);
-       }
-       mp_enable_preemption();
-#endif
-       /*
-        * Setup device.
-        */
-#if    MP_V1_1
-    {
-       extern boolean_t mp_v1_1_initialized;
-       if (mp_v1_1_initialized)
-           pic = 2;
-       else
-           pic = 0;
-    }
-#else
-       pic = 0;        /* FIXME .. interrupt registration moved to AppleIntelClock */
-#endif
-
-
-       /*
-        * We should attempt to test the real-time clock
-        * device here. If it were to fail, we should panic
-        * the system.
-        */
-       RtcFlag = /* test device */1;
-       printf("realtime clock configured\n");
-
-       simple_lock_init(&rtclock.lock, ETAP_NO_TRACE);
-       return (RtcFlag);
+       return  _rtc_nanotime_read(&pal_rtc_nanotime_info);
 }
 
 /*
- * Initialize the real-time clock device. Return success (1)
- * or failure (0). Since the real-time clock is required to
- * provide canonical mapped time, we allocate a page to keep
- * the clock time value. In addition, various variables used
- * to support the clock are initialized.  Note: the clock is
- * not started until rtclock_reset is called.
+ * rtc_clock_napped:
+ *
+ * Invoked from power management when we exit from a low C-State (>= C4)
+ * and the TSC has stopped counting.  The nanotime data is updated according
+ * to the provided value which represents the new value for nanotime.
  */
-int
-sysclk_init(void)
-{
-       vm_offset_t     *vp;
-#if    NCPUS > 1
-       mp_disable_preemption();
-       if (cpu_number() != master_cpu) {
-               mp_enable_preemption();
-               return(1);
+void
+rtc_clock_napped(uint64_t base, uint64_t tsc_base)
+{
+       pal_rtc_nanotime_t      *rntp = &pal_rtc_nanotime_info;
+       uint64_t        oldnsecs;
+       uint64_t        newnsecs;
+       uint64_t        tsc;
+
+       assert(!ml_get_interrupts_enabled());
+       tsc = rdtsc64();
+       oldnsecs = rntp->ns_base + _rtc_tsc_to_nanoseconds(tsc - rntp->tsc_base, rntp);
+       newnsecs = base + _rtc_tsc_to_nanoseconds(tsc - tsc_base, rntp);
+       
+       /*
+        * Only update the base values if time using the new base values
+        * is later than the time using the old base values.
+        */
+       if (oldnsecs < newnsecs) {
+           _pal_rtc_nanotime_store(tsc_base, base, rntp->scale, rntp->shift, rntp);
+           rtc_nanotime_set_commpage(rntp);
        }
-       mp_enable_preemption();
-#endif
-
-       RtcTime = &rtclock.time;
-       rtc_setvals( CLKNUM, RTC_MINRES );  /* compute constants */
-       rtc_set_cyc_per_sec();  /* compute number of tsc beats per second */
-       return (1);
 }
 
-static volatile unsigned int     last_ival = 0;
-
 /*
- * Get the clock device time. This routine is responsible
- * for converting the device's machine dependent time value
- * into a canonical mach_timespec_t value.
+ * Invoked from power management to correct the SFLM TSC entry drift problem:
+ * a small delta is added to the tsc_base.  This is equivalent to nudgin time
+ * backwards.  We require this to be on the order of a TSC quantum which won't
+ * cause callers of mach_absolute_time() to see time going backwards!
  */
-kern_return_t
-sysclk_gettime(
-       mach_timespec_t *cur_time)      /* OUT */
+void
+rtc_clock_adjust(uint64_t tsc_base_delta)
 {
-        mach_timespec_t        itime = {0, 0};
-       unsigned int    val, val2;
-       int             s;
-
-       if (!RtcTime) {
-               /* Uninitialized */
-               cur_time->tv_nsec = 0;
-               cur_time->tv_sec = 0;
-               return (KERN_SUCCESS);
-       }
+    pal_rtc_nanotime_t *rntp = &pal_rtc_nanotime_info;
 
-       /*
-        * Inhibit interrupts. Determine the incremental
-        * time since the last interrupt. (This could be
-        * done in assembler for a bit more speed).
-        */
-       LOCK_RTC(s);
-       do {
-           READ_8254(val);                 /* read clock */
-           READ_8254(val2);                /* read clock */
-       } while ( val2 > val || val2 < val - 10 );
-       if ( val > clks_per_int_99 ) {
-           outb( 0x0a, 0x20 );             /* see if interrupt pending */
-           if ( inb( 0x20 ) & 1 )
-               itime.tv_nsec = rtclock.intr_nsec; /* yes, add a tick */
-       }
-       itime.tv_nsec += ((clks_per_int - val) * time_per_clk) / ZHZ;
-       if ( itime.tv_nsec < last_ival ) {
-           if (rtc_print_lost_tick)
-               printf( "rtclock: missed clock interrupt.\n" );
-       }
-       last_ival = itime.tv_nsec;
-       cur_time->tv_sec = rtclock.time.tv_sec;
-       cur_time->tv_nsec = rtclock.time.tv_nsec;
-       UNLOCK_RTC(s);
-       ADD_MACH_TIMESPEC(cur_time, ((mach_timespec_t *)&itime));
-       return (KERN_SUCCESS);
+    assert(!ml_get_interrupts_enabled());
+    assert(tsc_base_delta < 100ULL);   /* i.e. it's small */
+    _rtc_nanotime_adjust(tsc_base_delta, rntp);
+    rtc_nanotime_set_commpage(rntp);
 }
 
-kern_return_t
-sysclk_gettime_internal(
-       mach_timespec_t *cur_time)      /* OUT */
+void
+rtc_clock_stepping(__unused uint32_t new_frequency,
+                  __unused uint32_t old_frequency)
 {
-        mach_timespec_t        itime = {0, 0};
-       unsigned int    val, val2;
-
-       if (!RtcTime) {
-               /* Uninitialized */
-               cur_time->tv_nsec = 0;
-               cur_time->tv_sec = 0;
-               return (KERN_SUCCESS);
-       }
+       panic("rtc_clock_stepping unsupported");
+}
 
-       /*
-        * Inhibit interrupts. Determine the incremental
-        * time since the last interrupt. (This could be
-        * done in assembler for a bit more speed).
-        */
-       do {
-           READ_8254(val);                 /* read clock */
-           READ_8254(val2);                /* read clock */
-       } while ( val2 > val || val2 < val - 10 );
-       if ( val > clks_per_int_99 ) {
-           outb( 0x0a, 0x20 );             /* see if interrupt pending */
-           if ( inb( 0x20 ) & 1 )
-               itime.tv_nsec = rtclock.intr_nsec; /* yes, add a tick */
-       }
-       itime.tv_nsec += ((clks_per_int - val) * time_per_clk) / ZHZ;
-       if ( itime.tv_nsec < last_ival ) {
-           if (rtc_print_lost_tick)
-               printf( "rtclock: missed clock interrupt.\n" );
-       }
-       last_ival = itime.tv_nsec;
-       cur_time->tv_sec = rtclock.time.tv_sec;
-       cur_time->tv_nsec = rtclock.time.tv_nsec;
-       ADD_MACH_TIMESPEC(cur_time, ((mach_timespec_t *)&itime));
-       return (KERN_SUCCESS);
+void
+rtc_clock_stepped(__unused uint32_t new_frequency,
+                 __unused uint32_t old_frequency)
+{
+       panic("rtc_clock_stepped unsupported");
 }
 
 /*
- * Get the clock device time when ALL interrupts are already disabled.
- * Same as above except for turning interrupts off and on.
- * This routine is responsible for converting the device's machine dependent
- * time value into a canonical mach_timespec_t value.
+ * rtc_sleep_wakeup:
+ *
+ * Invoked from power management when we have awoken from a sleep (S3)
+ * and the TSC has been reset, or from Deep Idle (S0) sleep when the TSC
+ * has progressed.  The nanotime data is updated based on the passed-in value.
+ *
+ * The caller must guarantee non-reentrancy.
  */
 void
-sysclk_gettime_interrupts_disabled(
-       mach_timespec_t *cur_time)      /* OUT */
+rtc_sleep_wakeup(
+       uint64_t                base)
 {
-       mach_timespec_t itime = {0, 0};
-       unsigned int    val;
-
-       if (!RtcTime) {
-               /* Uninitialized */
-               cur_time->tv_nsec = 0;
-               cur_time->tv_sec = 0;
-               return;
-       }
-
-       simple_lock(&rtclock.lock);
+       /* Set fixed configuration for lapic timers */
+       rtc_timer->rtc_config();
 
        /*
-        * Copy the current time knowing that we cant be interrupted
-        * between the two longwords and so dont need to use MTS_TO_TS
+        * Reset nanotime.
+        * The timestamp counter will have been reset
+        * but nanotime (uptime) marches onward.
         */
-       READ_8254(val);                     /* read clock */
-       if ( val > clks_per_int_99 ) {
-           outb( 0x0a, 0x20 );             /* see if interrupt pending */
-           if ( inb( 0x20 ) & 1 )
-               itime.tv_nsec = rtclock.intr_nsec; /* yes, add a tick */
-       }
-       itime.tv_nsec += ((clks_per_int - val) * time_per_clk) / ZHZ;
-       if ( itime.tv_nsec < last_ival ) {
-           if (rtc_print_lost_tick)
-               printf( "rtclock: missed clock interrupt.\n" );
-       }
-       last_ival = itime.tv_nsec;
-       cur_time->tv_sec = rtclock.time.tv_sec;
-       cur_time->tv_nsec = rtclock.time.tv_nsec;
-       ADD_MACH_TIMESPEC(cur_time, ((mach_timespec_t *)&itime));
-
-       simple_unlock(&rtclock.lock);
+       rtc_nanotime_init(base);
 }
 
-// utility routine 
-// Code to calculate how many processor cycles are in a second...
-
-static void
-rtc_set_cyc_per_sec() 
-{
-
-        int     x, y;
-        uint64_t cycles;
-        uint32_t   c[15];          // array for holding sampled cycle counts
-        mach_timespec_t tst[15];  // array for holding time values. NOTE for some reason tv_sec not work
-
-        for (x=0; x<15; x++) {  // quick sample 15 times
-                tst[x].tv_sec = 0;
-                tst[x].tv_nsec = 0;
-                sysclk_gettime_internal(&tst[x]);
-               rdtsc_hilo(&y, &c[x]);
-        }
-        y = 0;
-        cycles = 0;
-        for (x=0; x<14; x++) {
-          // simple formula really. calculate the numerator as the number of elapsed processor
-          // cycles * 1000 to adjust for the resolution we want. The denominator is the
-          // elapsed "real" time in nano-seconds. The result will be the processor speed in  
-          // Mhz. any overflows will be discarded before they are added
-          if ((c[x+1] > c[x]) && (tst[x+1].tv_nsec > tst[x].tv_nsec)) {
-                cycles += ((uint64_t)(c[x+1]-c[x]) * NSEC_PER_SEC ) / (uint64_t)(tst[x+1].tv_nsec - tst[x].tv_nsec);       // elapsed nsecs
-                y +=1;
-          }
-        }
-        if (y>0) { // we got more than 1 valid sample. This also takes care of the case of if the clock isn't running
-          cycles = cycles / y;    // calc our average
-        }
-       rtc_cyc_per_sec = cycles;
-       rdtsc_hilo(&rtc_last_int_tsc_hi, &rtc_last_int_tsc_lo);
-}
-
-static
-natural_t
-get_uptime_cycles(void)
-{
-        // get the time since the last interupt based on the processors TSC ignoring the
-        // RTC for speed
-        uint32_t   a,d,intermediate_lo,intermediate_hi,result;
-        uint64_t   newTime;
-        
-       rdtsc_hilo(&d, &a);
-        if (d != rtc_last_int_tsc_hi) {
-         newTime = d-rtc_last_int_tsc_hi;
-          newTime = (newTime<<32) + (a-rtc_last_int_tsc_lo);
-          result = newTime;
-        } else {
-          result = a-rtc_last_int_tsc_lo;
-        }
-        __asm__ volatile ( " mul %3 ": "=eax" (intermediate_lo), "=edx" (intermediate_hi): "a"(result), "d"(NSEC_PER_SEC) );
-        __asm__ volatile ( " div %3": "=eax" (result): "eax"(intermediate_lo), "edx" (intermediate_hi), "ecx" (rtc_cyc_per_sec) );
-        return result;
+void
+rtc_decrementer_configure(void) {
+       rtc_timer->rtc_config();
 }
-
-
 /*
- * Get clock device attributes.
+ * rtclock_early_init() is called very early at boot to
+ * establish mach_absolute_time() and set it to zero.
  */
-kern_return_t
-sysclk_getattr(
-       clock_flavor_t          flavor,
-       clock_attr_t            attr,           /* OUT */
-       mach_msg_type_number_t  *count)         /* IN/OUT */
+void
+rtclock_early_init(void)
 {
-       spl_t   s;
-
-       if (*count != 1)
-               return (KERN_FAILURE);
-       switch (flavor) {
-
-       case CLOCK_GET_TIME_RES:        /* >0 res */
-#if    (NCPUS == 1 || (MP_V1_1 && 0))
-               LOCK_RTC(s);
-               *(clock_res_t *) attr = 1000;
-               UNLOCK_RTC(s);
-               break;
-#endif /* (NCPUS == 1 || (MP_V1_1 && 0)) && AT386 */
-       case CLOCK_ALARM_CURRES:        /* =0 no alarm */
-               LOCK_RTC(s);
-               *(clock_res_t *) attr = rtclock.intr_nsec;
-               UNLOCK_RTC(s);
-               break;
-
-       case CLOCK_ALARM_MAXRES:
-               *(clock_res_t *) attr = RTC_MAXRES;
-               break;
-
-       case CLOCK_ALARM_MINRES:
-               *(clock_res_t *) attr = RTC_MINRES;
-               break;
-
-       default:
-               return (KERN_INVALID_VALUE);
-       }
-       return (KERN_SUCCESS);
+       assert(tscFreq);
+       rtc_set_timescale(tscFreq);
 }
 
 /*
- * Set clock device attributes.
+ * Initialize the real-time clock device.
+ * In addition, various variables used to support the clock are initialized.
  */
-kern_return_t
-sysclk_setattr(
-       clock_flavor_t          flavor,
-       clock_attr_t            attr,           /* IN */
-       mach_msg_type_number_t  count)          /* IN */
+int
+rtclock_init(void)
 {
-       spl_t           s;
-       int             freq;
-       int             adj;
-       clock_res_t     new_ires;
+       uint64_t        cycles;
 
-       if (count != 1)
-               return (KERN_FAILURE);
-       switch (flavor) {
+       assert(!ml_get_interrupts_enabled());
 
-       case CLOCK_GET_TIME_RES:
-       case CLOCK_ALARM_MAXRES:
-       case CLOCK_ALARM_MINRES:
-               return (KERN_FAILURE);
+       if (cpu_number() == master_cpu) {
 
-       case CLOCK_ALARM_CURRES:
-               new_ires = *(clock_res_t *) attr;
+               assert(tscFreq);
 
                /*
-                * The new resolution must be within the predetermined
-                * range.  If the desired resolution cannot be achieved
-                * to within 0.1%, an error is returned.
+                * Adjust and set the exported cpu speed.
                 */
-               if (new_ires < RTC_MAXRES || new_ires > RTC_MINRES)
-                       return (KERN_INVALID_VALUE);
-               freq = (NSEC_PER_SEC / new_ires);
-               adj = (((clknum % freq) * new_ires) / clknum);
-               if (adj > (new_ires / 1000))
-                       return (KERN_INVALID_VALUE);
+               cycles = rtc_export_speed(tscFreq);
+
                /*
-                * Record the new alarm resolution which will take effect
-                * on the next HZ aligned clock tick.
+                * Set min/max to actual.
+                * ACPI may update these later if speed-stepping is detected.
                 */
-               LOCK_RTC(s);
-               if ( freq != rtc_intr_freq ) {
-                   rtclock.new_ires = new_ires;
-                   new_clknum = clknum;
-               }
-               UNLOCK_RTC(s);
-               return (KERN_SUCCESS);
-
-       default:
-               return (KERN_INVALID_VALUE);
-       }
-}
-
-/*
- * Set next alarm time for the clock device. This call
- * always resets the time to deliver an alarm for the
- * clock.
- */
-void
-sysclk_setalarm(
-       mach_timespec_t *alarm_time)
-{
-       spl_t           s;
+               gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles;
+               gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles;
 
-       LOCK_RTC(s);
-       rtclock.alarm_time = *alarm_time;
-       RtcAlrm = &rtclock.alarm_time;
-       UNLOCK_RTC(s);
-}
+               rtc_timer_init();
+               clock_timebase_init();
+               ml_init_lock_timeout();
+               ml_init_delay_spin_threshold(10);
+       }
 
-/*
- * Configure the calendar clock.
- */
-int
-calend_config(void)
-{
-       return bbc_config();
-}
+       /* Set fixed configuration for lapic timers */
+       rtc_timer->rtc_config();
+       rtc_timer_start();
 
-/*
- * Initialize calendar clock.
- */
-int
-calend_init(void)
-{
        return (1);
 }
 
-/*
- * Get the current clock time.
- */
-kern_return_t
-calend_gettime(
-       mach_timespec_t *cur_time)      /* OUT */
-{
-       spl_t           s;
+// utility routine 
+// Code to calculate how many processor cycles are in a second...
 
-       LOCK_RTC(s);
-       if (!rtclock.calend_is_set) {
-               UNLOCK_RTC(s);
-               return (KERN_FAILURE);
+static void
+rtc_set_timescale(uint64_t cycles)
+{
+       pal_rtc_nanotime_t      *rntp = &pal_rtc_nanotime_info;
+       uint32_t    shift = 0;
+    
+       /* the "scale" factor will overflow unless cycles>SLOW_TSC_THRESHOLD */
+    
+       while ( cycles <= SLOW_TSC_THRESHOLD) {
+               shift++;
+               cycles <<= 1;
        }
+       
+       rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / cycles);
+
+       rntp->shift = shift;
 
-       (void) sysclk_gettime_internal(cur_time);
-       ADD_MACH_TIMESPEC(cur_time, &rtclock.calend_offset);
-       UNLOCK_RTC(s);
+       /*
+        * On some platforms, the TSC is not reset at warm boot. But the
+        * rebase time must be relative to the current boot so we can't use
+        * mach_absolute_time(). Instead, we convert the TSC delta since boot
+        * to nanoseconds.
+        */
+       if (tsc_rebase_abs_time == 0)
+               tsc_rebase_abs_time = _rtc_tsc_to_nanoseconds(
+                                               rdtsc64() - tsc_at_boot, rntp);
 
-       return (KERN_SUCCESS);
+       rtc_nanotime_init(0);
 }
 
-/*
- * Set the current clock time.
- */
-kern_return_t
-calend_settime(
-       mach_timespec_t *new_time)
+static uint64_t
+rtc_export_speed(uint64_t cyc_per_sec)
 {
-       mach_timespec_t curr_time;
-       spl_t           s;
+       pal_rtc_nanotime_t      *rntp = &pal_rtc_nanotime_info;
+       uint64_t        cycles;
 
-       LOCK_RTC(s);
-       (void) sysclk_gettime_internal(&curr_time);
-       rtclock.calend_offset = *new_time;
-       SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time);
-       rtclock.calend_is_set = TRUE;
-       UNLOCK_RTC(s);
+       if (rntp->shift != 0 )
+               printf("Slow TSC, rtc_nanotime.shift == %d\n", rntp->shift);
+    
+       /* Round: */
+        cycles = ((cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR/2))
+                       / UI_CPUFREQ_ROUNDING_FACTOR)
+                               * UI_CPUFREQ_ROUNDING_FACTOR;
 
-       (void) bbc_settime(new_time);
+       /*
+        * Set current measured speed.
+        */
+        if (cycles >= 0x100000000ULL) {
+            gPEClockFrequencyInfo.cpu_clock_rate_hz = 0xFFFFFFFFUL;
+        } else {
+            gPEClockFrequencyInfo.cpu_clock_rate_hz = (unsigned long)cycles;
+        }
+        gPEClockFrequencyInfo.cpu_frequency_hz = cycles;
 
-       return (KERN_SUCCESS);
+       kprintf("[RTCLOCK] frequency %llu (%llu)\n", cycles, cyc_per_sec);
+       return(cycles);
 }
 
-/*
- * Get clock device attributes.
- */
-kern_return_t
-calend_getattr(
-       clock_flavor_t          flavor,
-       clock_attr_t            attr,           /* OUT */
-       mach_msg_type_number_t  *count)         /* IN/OUT */
+void
+clock_get_system_microtime(
+       clock_sec_t                     *secs,
+       clock_usec_t            *microsecs)
 {
-       spl_t   s;
-
-       if (*count != 1)
-               return (KERN_FAILURE);
-       switch (flavor) {
-
-       case CLOCK_GET_TIME_RES:        /* >0 res */
-#if    (NCPUS == 1 || (MP_V1_1 && 0))
-               LOCK_RTC(s);
-               *(clock_res_t *) attr = 1000;
-               UNLOCK_RTC(s);
-               break;
-#else  /* (NCPUS == 1 || (MP_V1_1 && 0)) && AT386 */
-               LOCK_RTC(s);
-               *(clock_res_t *) attr = rtclock.intr_nsec;
-               UNLOCK_RTC(s);
-               break;
-#endif /* (NCPUS == 1 || (MP_V1_1 && 0)) && AT386 */
-
-       case CLOCK_ALARM_CURRES:        /* =0 no alarm */
-       case CLOCK_ALARM_MINRES:
-       case CLOCK_ALARM_MAXRES:
-               *(clock_res_t *) attr = 0;
-               break;
-
-       default:
-               return (KERN_INVALID_VALUE);
-       }
-       return (KERN_SUCCESS);
+       uint64_t        now = rtc_nanotime_read();
+
+       _absolutetime_to_microtime(now, secs, microsecs);
 }
 
 void
-clock_adjust_calendar(
-       clock_res_t     nsec)
+clock_get_system_nanotime(
+       clock_sec_t                     *secs,
+       clock_nsec_t            *nanosecs)
 {
-       spl_t           s;
+       uint64_t        now = rtc_nanotime_read();
 
-       LOCK_RTC(s);
-       if (rtclock.calend_is_set)
-               ADD_MACH_TIMESPEC_NSEC(&rtclock.calend_offset, nsec);
-       UNLOCK_RTC(s);
+       _absolutetime_to_nanotime(now, secs, nanosecs);
 }
 
 void
-clock_initialize_calendar(void)
+clock_gettimeofday_set_commpage(
+       uint64_t                                abstime,
+       uint64_t                                epoch,
+       uint64_t                                offset,
+       clock_sec_t                             *secs,
+       clock_usec_t                    *microsecs)
 {
-       mach_timespec_t bbc_time, curr_time;
-       spl_t           s;
-
-       if (bbc_gettime(&bbc_time) != KERN_SUCCESS)
-               return;
-
-       LOCK_RTC(s);
-       if (!rtclock.calend_is_set) {
-               (void) sysclk_gettime_internal(&curr_time);
-               rtclock.calend_offset = bbc_time;
-               SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time);
-               rtclock.calend_is_set = TRUE;
-       }
-       UNLOCK_RTC(s);
-}
+       uint64_t        now = abstime + offset;
+       uint32_t        remain;
 
-mach_timespec_t
-clock_get_calendar_offset(void)
-{
-       mach_timespec_t result = MACH_TIMESPEC_ZERO;
-       spl_t           s;
+       remain = _absolutetime_to_microtime(now, secs, microsecs);
 
-       LOCK_RTC(s);
-       if (rtclock.calend_is_set)
-               result = rtclock.calend_offset;
-       UNLOCK_RTC(s);
+       *secs += (clock_sec_t)epoch;
 
-       return (result);
+       commpage_set_timestamp(abstime - remain, *secs);
 }
 
 void
 clock_timebase_info(
        mach_timebase_info_t    info)
 {
-       spl_t   s;
-
-       LOCK_RTC(s);
-       info->numer = info->denom = 1;
-       UNLOCK_RTC(s);
+       info->numer = info->denom =  1;
 }      
 
+/*
+ * Real-time clock device interrupt.
+ */
 void
-clock_set_timer_deadline(
-       uint64_t                        deadline)
-{
-       spl_t                   s;
-
-       LOCK_RTC(s);
-       rtclock.timer_deadline = deadline;
-       rtclock.timer_is_set = TRUE;
-       UNLOCK_RTC(s);
-}
-
-void
-clock_set_timer_func(
-       clock_timer_func_t              func)
+rtclock_intr(
+       x86_saved_state_t       *tregs)
 {
-       spl_t           s;
+        uint64_t       rip;
+       boolean_t       user_mode = FALSE;
 
-       LOCK_RTC(s);
-       if (rtclock.timer_expire == NULL)
-               rtclock.timer_expire = func;
-       UNLOCK_RTC(s);
-}
+       assert(get_preemption_level() > 0);
+       assert(!ml_get_interrupts_enabled());
 
-\f
+       if (is_saved_state64(tregs) == TRUE) {
+               x86_saved_state64_t     *regs;
+                 
+               regs = saved_state64(tregs);
 
-/*
- * Load the count register and start the clock.
- */
-#define RTCLOCK_RESET()        {                                       \
-       outb(PITCTL_PORT, PIT_C0|PIT_NDIVMODE|PIT_READMODE);    \
-       outb(PITCTR0_PORT, (clks_per_int & 0xff));              \
-       outb(PITCTR0_PORT, (clks_per_int >> 8));                \
-}
+               if (regs->isf.cs & 0x03)
+                       user_mode = TRUE;
+               rip = regs->isf.rip;
+       } else {
+               x86_saved_state32_t     *regs;
 
-/*
- * Reset the clock device. This causes the realtime clock
- * device to reload its mode and count value (frequency).
- * Note: the CPU should be calibrated
- * before starting the clock for the first time.
- */
-
-void
-rtclock_reset(void)
-{
-       int             s;
+               regs = saved_state32(tregs);
 
-#if    NCPUS > 1 && !(MP_V1_1 && 0)
-       mp_disable_preemption();
-       if (cpu_number() != master_cpu) {
-               mp_enable_preemption();
-               return;
+               if (regs->cs & 0x03)
+                       user_mode = TRUE;
+               rip = regs->eip;
        }
-       mp_enable_preemption();
-#endif /* NCPUS > 1 && AT386 && !MP_V1_1 */
-       LOCK_RTC(s);
-       RTCLOCK_RESET();
-       UNLOCK_RTC(s);
+
+       /* call the generic etimer */
+       timer_intr(user_mode, rip);
 }
 
+
 /*
- * Real-time clock device interrupt. Called only on the
- * master processor. Updates the clock time and upcalls
- * into the higher level clock code to deliver alarms.
+ *     Request timer pop from the hardware 
  */
-int
-rtclock_intr(void)
-{
-       uint64_t                abstime;
-       mach_timespec_t clock_time;
-       int                             i;
-       spl_t                   s;
 
-       /*
-        * Update clock time. Do the update so that the macro
-        * MTS_TO_TS() for reading the mapped time works (e.g.
-        * update in order: mtv_csec, mtv_time.tv_nsec, mtv_time.tv_sec).
-        */      
-       LOCK_RTC(s);
-       rdtsc_hilo(&rtc_last_int_tsc_hi, &rtc_last_int_tsc_lo);
-       i = rtclock.time.tv_nsec + rtclock.intr_nsec;
-       if (i < NSEC_PER_SEC)
-           rtclock.time.tv_nsec = i;
-       else {
-           rtclock.time.tv_nsec = i - NSEC_PER_SEC;
-           rtclock.time.tv_sec++;
-       }
-       /* note time now up to date */
-       last_ival = 0;
-
-       rtclock.abstime += rtclock.intr_nsec;
-       abstime = rtclock.abstime;
-       if (    rtclock.timer_is_set                            &&
-                       rtclock.timer_deadline <= abstime               ) {
-               rtclock.timer_is_set = FALSE;
-               UNLOCK_RTC(s);
-
-               (*rtclock.timer_expire)(abstime);
+uint64_t
+setPop(uint64_t time)
+{
+       uint64_t        now;
+       uint64_t        pop;
 
-               LOCK_RTC(s);
+       /* 0 and EndOfAllTime are special-cases for "clear the timer" */
+       if (time == 0 || time == EndOfAllTime ) {
+               time = EndOfAllTime;
+               now = 0;
+               pop = rtc_timer->rtc_set(0, 0);
+       } else {
+               now = rtc_nanotime_read();      /* The time in nanoseconds */
+               pop = rtc_timer->rtc_set(time, now);
        }
 
-       /*
-        * Perform alarm clock processing if needed. The time
-        * passed up is incremented by a half-interrupt tick
-        * to trigger alarms closest to their desired times.
-        * The clock_alarm_intr() routine calls sysclk_setalrm()
-        * before returning if later alarms are pending.
-        */
+       /* Record requested and actual deadlines set */
+       x86_lcpu()->rtcDeadline = time;
+       x86_lcpu()->rtcPop      = pop;
 
-       if (RtcAlrm && (RtcAlrm->tv_sec < RtcTime->tv_sec ||
-                       (RtcAlrm->tv_sec == RtcTime->tv_sec &&
-                        RtcDelt >= RtcAlrm->tv_nsec - RtcTime->tv_nsec))) {
-               clock_time.tv_sec = 0;
-               clock_time.tv_nsec = RtcDelt;
-               ADD_MACH_TIMESPEC (&clock_time, RtcTime);
-               RtcAlrm = 0;
-               UNLOCK_RTC(s);
-               /*
-                * Call clock_alarm_intr() without RTC-lock.
-                * The lock ordering is always CLOCK-lock
-                * before RTC-lock.
-                */
-               clock_alarm_intr(SYSTEM_CLOCK, &clock_time);
-               LOCK_RTC(s);
-       }
-
-       /*
-        * On a HZ-tick boundary: return 0 and adjust the clock
-        * alarm resolution (if requested).  Otherwise return a
-        * non-zero value.
-        */
-       if ((i = --rtc_intr_count) == 0) {
-           if (rtclock.new_ires) {
-                       rtc_setvals(new_clknum, rtclock.new_ires);
-                       RTCLOCK_RESET();            /* lock clock register */
-                       rtclock.new_ires = 0;
-           }
-           rtc_intr_count = rtc_intr_hertz;
-       }
-       UNLOCK_RTC(s);
-       return (i);
+       return pop - now;
 }
 
-void
-clock_get_uptime(
-       uint64_t                *result)
+uint64_t
+mach_absolute_time(void)
 {
-       uint32_t                ticks;
-       spl_t                   s;
-
-       LOCK_RTC(s);
-       ticks = get_uptime_cycles();
-       *result = rtclock.abstime;
-       UNLOCK_RTC(s);
-
-       *result += ticks;
+       return rtc_nanotime_read();
 }
 
-void
-clock_interval_to_deadline(
-       uint32_t                interval,
-       uint32_t                scale_factor,
-       uint64_t                *result)
+uint64_t
+mach_approximate_time(void)
 {
-       uint64_t                abstime;
-
-       clock_get_uptime(result);
-
-       clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
-
-       *result += abstime;
+       return rtc_nanotime_read();
 }
 
 void
@@ -1017,13 +511,21 @@ clock_interval_to_absolutetime_interval(
 }
 
 void
-clock_absolutetime_interval_to_deadline(
-       uint64_t                abstime,
-       uint64_t                *result)
+absolutetime_to_microtime(
+       uint64_t                        abstime,
+       clock_sec_t                     *secs,
+       clock_usec_t            *microsecs)
 {
-       clock_get_uptime(result);
+       _absolutetime_to_microtime(abstime, secs, microsecs);
+}
 
-       *result += abstime;
+void
+nanotime_to_absolutetime(
+       clock_sec_t                     secs,
+       clock_nsec_t            nanosecs,
+       uint64_t                        *result)
+{
+       *result = ((uint64_t)secs * NSEC_PER_SEC) + nanosecs;
 }
 
 void
@@ -1042,87 +544,13 @@ nanoseconds_to_absolutetime(
        *result = nanoseconds;
 }
 
-/*
- * measure_delay(microseconds)
- *
- * Measure elapsed time for delay calls
- * Returns microseconds.
- * 
- * Microseconds must not be too large since the counter (short) 
- * will roll over.  Max is about 13 ms.  Values smaller than 1 ms are ok.
- * This uses the assumed frequency of the rt clock which is emperically
- * accurate to only about 200 ppm.
- */
-
-int
-measure_delay(
-       int us)
-{
-       unsigned int    lsb, val;
-
-       outb(PITCTL_PORT, PIT_C0|PIT_NDIVMODE|PIT_READMODE);
-       outb(PITCTR0_PORT, 0xff);       /* set counter to max value */
-       outb(PITCTR0_PORT, 0xff);
-       delay(us);
-       outb(PITCTL_PORT, PIT_C0);
-       lsb = inb(PITCTR0_PORT);
-       val = (inb(PITCTR0_PORT) << 8) | lsb;
-       val = 0xffff - val;
-       val *= 1000000;
-       val /= CLKNUM;
-       return(val);
-}
-
-/*
- * calibrate_delay(void)
- *
- * Adjust delaycount.  Called from startup before clock is started
- * for normal interrupt generation.
- */
-
 void
-calibrate_delay(void)
+machine_delay_until(
+       uint64_t interval,
+       uint64_t                deadline)
 {
-       unsigned        val;
-       int             prev = 0;
-       register int    i;
-
-       printf("adjusting delay count: %d", delaycount);
-       for (i=0; i<10; i++) {
-               prev = delaycount;
-               /* 
-                * microdata must not be too large since measure_timer
-                * will not return accurate values if the counter (short) 
-                * rolls over
-                */
-               val = measure_delay(microdata);
-               if (val == 0) {
-                 delaycount *= 2;
-               } else {
-               delaycount *= microdata;
-               delaycount += val-1;    /* round up to upper us */
-               delaycount /= val;
-               }
-               if (delaycount <= 0)
-                       delaycount = 1;
-               if (delaycount != prev)
-                       printf(" %d", delaycount);
-       }
-       printf("\n");
-}
-
-#if    MACH_KDB
-void
-test_delay(void);
-
-void
-test_delay(void)
-{
-       register i;
-
-       for (i = 0; i < 10; i++)
-               printf("%d, %d\n", i, measure_delay(i));
-       for (i = 10; i <= 100; i+=10)
-               printf("%d, %d\n", i, measure_delay(i));
+       (void)interval;
+       while (mach_absolute_time() < deadline) {
+               cpu_pause();
+       } 
 }
-#endif /* MACH_KDB */