]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/rtclock.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / osfmk / i386 / rtclock.c
index f592c75445c6bfac29c28f26ac328725022769fc..bc6fa6524b9d48e82c5213f8d69cb71d462815ab 100644 (file)
@@ -1,17 +1,20 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
- * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -19,8 +22,8 @@
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
 /*
  *     File:           i386/rtclock.c
  *     Purpose:        Routines for handling the machine dependent
- *                     real-time clock. This clock is generated by
- *                     the Intel 8254 Programmable Interval Timer.
+ *                     real-time clock. Historically, this clock is
+ *                     generated by the Intel 8254 Programmable Interval
+ *                     Timer, but local apic timers are now used for
+ *                     this purpose with the master time reference being
+ *                     the cpu clock counted by the timestamp MSR.
  */
 
-#include <cpus.h>
-#include <platforms.h>
-#include <mach_kdb.h>
 
 #include <mach/mach_types.h>
 
-#include <kern/cpu_number.h>
 #include <kern/cpu_data.h>
+#include <kern/cpu_number.h>
 #include <kern/clock.h>
 #include <kern/host_notify.h>
 #include <kern/macro_help.h>
 #include <kern/misc_protos.h>
 #include <kern/spl.h>
-#include <machine/mach_param.h>        /* HZ */
+#include <kern/assert.h>
+#include <kern/timer_queue.h>
 #include <mach/vm_prot.h>
 #include <vm/pmap.h>
-#include <vm/vm_kern.h>                /* for kernel_map */
-#include <i386/ipl.h>
-#include <i386/pit.h>
-#include <i386/pio.h>
-#include <i386/misc_protos.h>
-#include <i386/rtclock_entries.h>
-#include <i386/hardclock_entries.h>
-#include <i386/proc_reg.h>
+#include <vm/vm_kern.h>         /* for kernel_map */
+#include <architecture/i386/pio.h>
 #include <i386/machine_cpu.h>
+#include <i386/cpuid.h>
+#include <i386/cpu_threads.h>
+#include <i386/mp.h>
+#include <i386/machine_routines.h>
+#include <i386/pal_routines.h>
+#include <i386/proc_reg.h>
+#include <i386/misc_protos.h>
 #include <pexpert/pexpert.h>
+#include <machine/limits.h>
+#include <machine/commpage.h>
+#include <sys/kdebug.h>
+#include <i386/tsc.h>
+#include <i386/rtclock_protos.h>
+#define UI_CPUFREQ_ROUNDING_FACTOR      10000000
 
-#define DISPLAYENTER(x) printf("[RTCLOCK] entering " #x "\n");
-#define DISPLAYEXIT(x) printf("[RTCLOCK] leaving " #x "\n");
-#define DISPLAYVALUE(x,y) printf("[RTCLOCK] " #x ":" #y " = 0x%08x \n",y);
-
-int            sysclk_config(void);
-
-int            sysclk_init(void);
-
-kern_return_t  sysclk_gettime(
-       mach_timespec_t                 *cur_time);
-
-kern_return_t  sysclk_getattr(
-       clock_flavor_t                  flavor,
-       clock_attr_t                    attr,
-       mach_msg_type_number_t  *count);
-
-kern_return_t  sysclk_setattr(
-       clock_flavor_t                  flavor,
-       clock_attr_t                    attr,
-       mach_msg_type_number_t  count);
-
-void           sysclk_setalarm(
-       mach_timespec_t                 *alarm_time);
-
-extern void (*IOKitRegisterInterruptHook)(void *,  int irq, int isclock);
-
-/*
- * Lists of clock routines.
- */
-struct clock_ops  sysclk_ops = {
-       sysclk_config,                  sysclk_init,
-       sysclk_gettime,                 0,
-       sysclk_getattr,                 sysclk_setattr,
-       sysclk_setalarm,
-};
-
-int            calend_config(void);
-
-int            calend_init(void);
-
-kern_return_t  calend_gettime(
-       mach_timespec_t                 *cur_time);
-
-kern_return_t  calend_getattr(
-       clock_flavor_t                  flavor,
-       clock_attr_t                    attr,
-       mach_msg_type_number_t  *count);
-
-struct clock_ops calend_ops = {
-       calend_config,                  calend_init,
-       calend_gettime,                 0,
-       calend_getattr,                 0,
-       0,
-};
-
-/* local data declarations */
-mach_timespec_t                *RtcTime = (mach_timespec_t *)0;
-mach_timespec_t                *RtcAlrm;
-clock_res_t                    RtcDelt;
-
-/* global data declarations */
-struct {
-       uint64_t                        abstime;
-
-       mach_timespec_t         time;
-       mach_timespec_t         alarm_time;     /* time of next alarm */
-
-       mach_timespec_t         calend_offset;
-       boolean_t                       calend_is_set;
-
-       int64_t                         calend_adjtotal;
-       int32_t                         calend_adjdelta;
-
-       uint64_t                        timer_deadline;
-       boolean_t                       timer_is_set;
-       clock_timer_func_t      timer_expire;
-
-       clock_res_t                     new_ires;       /* pending new resolution (nano ) */
-       clock_res_t                     intr_nsec;      /* interrupt resolution (nano) */
-        mach_timebase_info_data_t      timebase_const;
-
-       decl_simple_lock_data(,lock)    /* real-time clock device lock */
-} rtclock;
-
-unsigned int           clknum;                        /* clks per second */
-unsigned int           new_clknum;                    /* pending clknum */
-unsigned int           time_per_clk;                  /* time per clk in ZHZ */
-unsigned int           clks_per_int;                  /* clks per interrupt */
-unsigned int           clks_per_int_99;
-int                    rtc_intr_count;                /* interrupt counter */
-int                    rtc_intr_hertz;                /* interrupts per HZ */
-int                    rtc_intr_freq;                 /* interrupt frequency */
-int                    rtc_print_lost_tick;           /* print lost tick */
-
-uint32_t               rtc_cyc_per_sec;                /* processor cycles per seconds */
-uint32_t               rtc_quant_scale;                /* used internally to convert clocks to nanos */
-
-/*
- *     Macros to lock/unlock real-time clock device.
- */
-#define LOCK_RTC(s)                                    \
-MACRO_BEGIN                                                    \
-       (s) = splclock();                               \
-       simple_lock(&rtclock.lock);             \
-MACRO_END
-
-#define UNLOCK_RTC(s)                          \
-MACRO_BEGIN                                                    \
-       simple_unlock(&rtclock.lock);   \
-       splx(s);                                                \
-MACRO_END
-
-/*
- * i8254 control.  ** MONUMENT **
- *
- * The i8254 is a traditional PC device with some arbitrary characteristics.
- * Basically, it is a register that counts at a fixed rate and can be
- * programmed to generate an interrupt every N counts.  The count rate is
- * clknum counts per second (see pit.h), historically 1193167 we believe.
- * Various constants are computed based on this value, and we calculate
- * them at init time for execution efficiency.  To obtain sufficient
- * accuracy, some of the calculation are most easily done in floating
- * point and then converted to int.
- *
- * We want an interrupt every 10 milliseconds, approximately.  The count
- * which will do that is clks_per_int.  However, that many counts is not
- * *exactly* 10 milliseconds; it is a bit more or less depending on
- * roundoff.  The actual time per tick is calculated and saved in
- * rtclock.intr_nsec, and it is that value which is added to the time
- * register on each tick.
- *
- * The i8254 counter can be read between interrupts in order to determine
- * the time more accurately.  The counter counts down from the preset value
- * toward 0, and we have to handle the case where the counter has been
- * reset just before being read and before the interrupt has been serviced.
- * Given a count since the last interrupt, the time since then is given
- * by (count * time_per_clk).  In order to minimize integer truncation,
- * we perform this calculation in an arbitrary unit of time which maintains
- * the maximum precision, i.e. such that one tick is 1.0e9 of these units,
- * or close to the precision of a 32-bit int.  We then divide by this unit
- * (which doesn't lose precision) to get nanoseconds.  For notation
- * purposes, this unit is defined as ZHZ = zanoseconds per nanosecond.
- *
- * This sequence to do all this is in sysclk_gettime.  For efficiency, this
- * sequence also needs the value that the counter will have if it has just
- * overflowed, so we precompute that also.  
- *
- * The fix for certain really old certain platforms has been removed
- * (specifically the DEC XL5100) have been observed to have problem
- * with latching the counter, and they occasionally (say, one out of
- * 100,000 times) return a bogus value.  Hence, the present code reads
- * the counter twice and checks for a consistent pair of values.
- * the code was:
- *     do {
- *         READ_8254(val);                
- *         READ_8254(val2);                
- *     } while ( val2 > val || val2 < val - 10 );
- *
- *
- * Some attributes of the rt clock can be changed, including the
- * interrupt resolution.  We default to the minimum resolution (10 ms),
- * but allow a finer resolution to be requested.  The assumed frequency
- * of the clock can also be set since it appears that the actual
- * frequency of real-world hardware can vary from the nominal by
- * 200 ppm or more.  When the frequency is set, the values above are
- * recomputed and we continue without resetting or changing anything else.
- */
-#define RTC_MINRES     (NSEC_PER_SEC / HZ)     /* nsec per tick */
-#define        RTC_MAXRES      (RTC_MINRES / 20)       /* nsec per tick */
-#define        ZANO            (1000000000)
-#define ZHZ             (ZANO / (NSEC_PER_SEC / HZ))
-#define READ_8254(val) { \
-        outb(PITCTL_PORT, PIT_C0);             \
-       (val) = inb(PITCTR0_PORT);               \
-       (val) |= inb(PITCTR0_PORT) << 8 ; }
-
-#define UI_CPUFREQ_ROUNDING_FACTOR     10000000
-
-
-/*
- * Forward decl.
- */
+int             rtclock_init(void);
 
-void         rtc_setvals( unsigned int, clock_res_t );
+uint64_t        tsc_rebase_abs_time = 0;
 
-static void  rtc_set_cyc_per_sec();
+static void     rtc_set_timescale(uint64_t cycles);
+static uint64_t rtc_export_speed(uint64_t cycles);
 
-/* define assembly routines */
-
-
-/*
- * Inlines to get timestamp counter value.
- */
-
-inline static uint64_t
-rdtsc_64(void)
+void
+rtc_timer_start(void)
 {
-       uint64_t result;
-        asm volatile("rdtsc": "=A" (result));
-       return result;
+       /*
+        * Force a complete re-evaluation of timer deadlines.
+        */
+       x86_lcpu()->rtcDeadline = EndOfAllTime;
+       timer_resync_deadlines();
 }
 
-// create_mul_quant_GHZ create a constant that can be used to multiply
-// the TSC by to create nanoseconds. This is a 32 bit number
-// and the TSC *MUST* have a frequency higher than 1000Mhz for this routine to work
-//
-// The theory here is that we know how many TSCs-per-sec the processor runs at. Normally to convert this
-// to nanoseconds you would multiply the current time stamp by 1000000000 (a billion) then divide
-// by TSCs-per-sec to get nanoseconds. Unfortunatly the TSC is 64 bits which would leave us with
-// 96 bit intermediate results from the dultiply that must be divided by.
-// usually thats
-// uint96 = tsc * numer
-// nanos = uint96 / denom
-// Instead, we create this quant constant and it becomes the numerator, the denominator
-// can then be 0x100000000 which makes our division as simple as forgetting the lower 32 bits
-// of the result. We can also pass this number to user space as the numer and pass 0xFFFFFFFF
-// as the denom to converting raw counts to nanos. the difference is so small as to be undetectable
-// by anything.
-// unfortunatly we can not do this for sub GHZ processors. In that case, all we do is pass the CPU
-// speed in raw as the denom and we pass in 1000000000 as the numerator. No short cuts allowed
-
-inline static uint32_t
-create_mul_quant_GHZ(uint32_t quant)
+static inline uint32_t
+_absolutetime_to_microtime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *microsecs)
 {
-       return (uint32_t)((50000000ULL << 32) / quant);
+       uint32_t remain;
+       *secs = abstime / (uint64_t)NSEC_PER_SEC;
+       remain = (uint32_t)(abstime % (uint64_t)NSEC_PER_SEC);
+       *microsecs = remain / NSEC_PER_USEC;
+       return remain;
 }
 
-// this routine takes a value of raw TSC ticks and applies the passed mul_quant
-// generated by create_mul_quant() This is our internal routine for creating
-// nanoseconds
-// since we don't really have uint96_t this routine basically does this....
-// uint96_t intermediate = (*value) * scale
-// return (intermediate >> 32)
-inline static uint64_t
-fast_get_nano_from_abs(uint64_t value, int scale)
+static inline void
+_absolutetime_to_nanotime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *nanosecs)
 {
-    asm ("     movl    %%edx,%%esi     \n\t"
-         "      mull   %%ecx           \n\t"
-         "      movl   %%edx,%%edi     \n\t"
-         "      movl   %%esi,%%eax     \n\t"
-         "      mull   %%ecx           \n\t"
-         "      xorl   %%ecx,%%ecx     \n\t"   
-         "      addl   %%edi,%%eax     \n\t"   
-         "      adcl   %%ecx,%%edx         "
-               : "+A" (value)
-               : "c" (scale)
-               : "%esi", "%edi");
-    return value;
+       *secs = abstime / (uint64_t)NSEC_PER_SEC;
+       *nanosecs = (clock_usec_t)(abstime % (uint64_t)NSEC_PER_SEC);
 }
 
 /*
- * this routine basically does this...
- * ts.tv_sec = nanos / 1000000000;     create seconds
- * ts.tv_nsec = nanos % 1000000000;    create remainder nanos
+ * Nanotime/mach_absolutime_time
+ * -----------------------------
+ * The timestamp counter (TSC) - which counts cpu clock cycles and can be read
+ * efficiently by the kernel and in userspace - is the reference for all timing.
+ * The cpu clock rate is platform-dependent and may stop or be reset when the
+ * processor is napped/slept.  As a result, nanotime is the software abstraction
+ * used to maintain a monotonic clock, adjusted from an outside reference as needed.
+ *
+ * The kernel maintains nanotime information recording:
+ *      - the ratio of tsc to nanoseconds
+ *       with this ratio expressed as a 32-bit scale and shift
+ *       (power of 2 divider);
+ *     - { tsc_base, ns_base } pair of corresponding timestamps.
+ *
+ * The tuple {tsc_base, ns_base, scale, shift} is exported in the commpage
+ * for the userspace nanotime routine to read.
+ *
+ * All of the routines which update the nanotime data are non-reentrant.  This must
+ * be guaranteed by the caller.
  */
-inline static mach_timespec_t 
-nanos_to_timespec(uint64_t nanos)
-{
-       union {
-               mach_timespec_t ts;
-               uint64_t u64;
-       } ret;
-        ret.u64 = nanos;
-        asm volatile("divl %1" : "+A" (ret.u64) : "r" (NSEC_PER_SEC));
-        return ret.ts;
-}
-
-// the following two routine perform the 96 bit arithmetic we need to
-// convert generic absolute<->nanoseconds
-// the multiply routine takes a uint64_t and a uint32_t and returns the result in a
-// uint32_t[3] array. the dicide routine takes this uint32_t[3] array and 
-// divides it by a uint32_t returning a uint64_t
-inline static void
-longmul(uint64_t       *abstime, uint32_t multiplicand, uint32_t *result)
-{
-    asm volatile(
-        " pushl        %%ebx                   \n\t"   
-        " movl %%eax,%%ebx             \n\t"
-        " movl (%%eax),%%eax           \n\t"
-        " mull %%ecx                   \n\t"
-        " xchg %%eax,%%ebx             \n\t"
-        " pushl        %%edx                   \n\t"
-        " movl 4(%%eax),%%eax          \n\t"
-        " mull %%ecx                   \n\t"
-        " movl %2,%%ecx                \n\t"
-        " movl %%ebx,(%%ecx)           \n\t"
-        " popl %%ebx                   \n\t"
-        " addl %%ebx,%%eax             \n\t"
-        " popl %%ebx                   \n\t"
-        " movl %%eax,4(%%ecx)          \n\t"
-        " adcl $0,%%edx                \n\t"
-        " movl %%edx,8(%%ecx)  // and save it"
-         : : "a"(abstime), "c"(multiplicand), "m"(result));
-    
-}
-
-inline static uint64_t
-longdiv(uint32_t *numer, uint32_t denom)
-{
-    uint64_t   result;
-    asm volatile(
-        " pushl        %%ebx                   \n\t"
-        " movl %%eax,%%ebx             \n\t"
-        " movl 8(%%eax),%%edx          \n\t"
-        " movl 4(%%eax),%%eax          \n\t"
-        " divl %%ecx                   \n\t"
-        " xchg %%ebx,%%eax             \n\t"
-        " movl (%%eax),%%eax           \n\t"
-        " divl %%ecx                   \n\t"
-        " xchg %%ebx,%%edx             \n\t"
-        " popl %%ebx                   \n\t"
-        : "=A"(result) : "a"(numer),"c"(denom));
-    return result;
-}
-
-#define PIT_Mode4      0x08            /* turn on mode 4 one shot software trigger */
-
-// Enable or disable timer 2.
-inline static void
-enable_PIT2()
-{
-    asm volatile(
-        " inb   $97,%%al        \n\t"
-        " and   $253,%%al       \n\t"
-        " or    $1,%%al         \n\t"
-        " outb  %%al,$97        \n\t"
-      : : : "%al" );
-}
-
-inline static void
-disable_PIT2()
-{
-    asm volatile(
-        " inb   $97,%%al        \n\t"
-        " and   $253,%%al       \n\t"
-        " outb  %%al,$97        \n\t"
-        : : : "%al" );
-}
-
-// ctimeRDTSC() routine sets up counter 2 to count down 1/20 of a second
-// it pauses until the value is latched in the counter
-// and then reads the time stamp counter to return to the caller
-// utility routine 
-// Code to calculate how many processor cycles are in a second...
-inline static void
-set_PIT2(int value)
+static inline void
+rtc_nanotime_set_commpage(pal_rtc_nanotime_t *rntp)
 {
-// first, tell the clock we are going to write 16 bytes to the counter and enable one-shot mode
-// then write the two bytes into the clock register.
-// loop until the value is "realized" in the clock, this happens on the next tick
-//
-    asm volatile(
-        " movb  $184,%%al       \n\t"
-        " outb %%al,$67        \n\t"
-        " movb %%dl,%%al       \n\t"
-        " outb %%al,$66        \n\t"
-        " movb %%dh,%%al       \n\t"
-        " outb %%al,$66        \n"
-"1:      inb   $66,%%al        \n\t" 
-        " inb  $66,%%al        \n\t"
-        " cmp  %%al,%%dh       \n\t"
-        " jne  1b"
-         : : "d"(value) : "%al");
+       commpage_set_nanotime(rntp->tsc_base, rntp->ns_base, rntp->scale, rntp->shift);
 }
 
-inline static uint64_t
-get_PIT2(unsigned int *value)
-{
-// this routine first latches the time, then gets the time stamp so we know 
-// how long the read will take later. Reads
-    register uint64_t  result;
-    asm volatile(
-        " xorl %%ecx,%%ecx     \n\t"
-        " movb $128,%%al       \n\t"
-        " outb %%al,$67        \n\t"
-        " rdtsc                        \n\t"
-        " pushl        %%eax           \n\t"
-        " inb  $66,%%al        \n\t"
-        " movb %%al,%%cl       \n\t"
-        " inb  $66,%%al        \n\t"
-        " movb %%al,%%ch       \n\t"
-        " popl %%eax   "
-         : "=A"(result), "=c"(*value));
-        return result;
-}
-
-static uint32_t
-timeRDTSC(void)
-{
-    uint64_t   latchTime;
-    uint64_t   saveTime,intermediate;
-    unsigned int timerValue,x;
-    boolean_t   int_enabled;
-    uint64_t   fact[6] = { 2000011734ll,
-                            2000045259ll,
-                            2000078785ll,
-                            2000112312ll,
-                            2000145841ll,
-                            2000179371ll};
-                            
-    int_enabled = ml_set_interrupts_enabled(FALSE);
-    
-    enable_PIT2();      // turn on PIT2
-    set_PIT2(0);       // reset timer 2 to be zero
-    latchTime = rdtsc_64();    // get the time stamp to time 
-    latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes
-    set_PIT2(59658);   // set up the timer to count 1/20th a second
-    saveTime = rdtsc_64();     // now time how ling a 20th a second is...
-    get_PIT2(&x);
-    do { get_PIT2(&timerValue); x = timerValue;} while (timerValue > x);
-    do {
-        intermediate = get_PIT2(&timerValue);
-        if (timerValue>x) printf("Hey we are going backwards! %d, %d\n",timerValue,x);
-        x = timerValue;
-    } while ((timerValue != 0) && (timerValue >5));
-    printf("Timer value:%d\n",timerValue);
-    printf("intermediate 0x%08x:0x%08x\n",intermediate);
-    printf("saveTime 0x%08x:0x%08x\n",saveTime);
-    
-    intermediate = intermediate - saveTime;    // raw # of tsc's it takes for about 1/20 second
-    intermediate = intermediate * fact[timerValue]; // actual time spent
-    intermediate = intermediate / 2000000000ll; // rescale so its exactly 1/20 a second
-    intermediate = intermediate + latchTime; // add on our save fudge
-    set_PIT2(0);       // reset timer 2 to be zero
-    disable_PIT2(0);    // turn off PIT 2
-    ml_set_interrupts_enabled(int_enabled);
-    return intermediate;
-}
-
-static uint64_t
-rdtsctime_to_nanoseconds( void )
+/*
+ * rtc_nanotime_init:
+ *
+ * Intialize the nanotime info from the base time.
+ */
+static inline void
+_rtc_nanotime_init(pal_rtc_nanotime_t *rntp, uint64_t base)
 {
-        uint32_t       numer;
-        uint32_t       denom;
-        uint64_t       abstime;
-
-        uint32_t       intermediate[3];
-        
-        numer = rtclock.timebase_const.numer;
-        denom = rtclock.timebase_const.denom;
-        abstime = rdtsc_64();
-        if (denom == 0xFFFFFFFF) {
-            abstime = fast_get_nano_from_abs(abstime, numer);
-        } else {
-            longmul(&abstime, numer, intermediate);
-            abstime = longdiv(intermediate, denom);
-        }
-        return abstime;
-}
+       uint64_t        tsc = rdtsc64();
 
-inline static mach_timespec_t 
-rdtsc_to_timespec(void)
-{
-        uint64_t       currNanos;
-        currNanos = rdtsctime_to_nanoseconds();
-        return nanos_to_timespec(currNanos);
+       _pal_rtc_nanotime_store(tsc, base, rntp->scale, rntp->shift, rntp);
 }
 
-/*
- * Initialize non-zero clock structure values.
- */
 void
-rtc_setvals(
-       unsigned int new_clknum,
-       clock_res_t  new_ires
-       )
+rtc_nanotime_init(uint64_t base)
 {
-    unsigned int timeperclk;
-    unsigned int scale0;
-    unsigned int scale1;
-    unsigned int res;
-
-    clknum = new_clknum;
-    rtc_intr_freq = (NSEC_PER_SEC / new_ires);
-    rtc_intr_hertz = rtc_intr_freq / HZ;
-    clks_per_int = (clknum + (rtc_intr_freq / 2)) / rtc_intr_freq;
-    clks_per_int_99 = clks_per_int - clks_per_int/100;
-
-    /*
-     * The following calculations are done with scaling integer operations
-     * in order that the integer results are accurate to the lsb.
-     */
-    timeperclk = div_scale(ZANO, clknum, &scale0);     /* 838.105647 nsec */
-
-    time_per_clk = mul_scale(ZHZ, timeperclk, &scale1);        /* 83810 */
-    if (scale0 > scale1)
-       time_per_clk >>= (scale0 - scale1);
-    else if (scale0 < scale1)
-       panic("rtc_clock: time_per_clk overflow\n");
-
-    /*
-     * Notice that rtclock.intr_nsec is signed ==> use unsigned int res
-     */
-    res = mul_scale(clks_per_int, timeperclk, &scale1);        /* 10000276 */
-    if (scale0 > scale1)
-       rtclock.intr_nsec = res >> (scale0 - scale1);
-    else
-       panic("rtc_clock: rtclock.intr_nsec overflow\n");
-
-    rtc_intr_count = 1;
-    RtcDelt = rtclock.intr_nsec/2;
+       _rtc_nanotime_init(&pal_rtc_nanotime_info, base);
+       rtc_nanotime_set_commpage(&pal_rtc_nanotime_info);
 }
 
 /*
- * Configure the real-time clock device. Return success (1)
- * or failure (0).
+ * rtc_nanotime_init_commpage:
+ *
+ * Call back from the commpage initialization to
+ * cause the commpage data to be filled in once the
+ * commpages have been created.
  */
-
-int
-sysclk_config(void)
+void
+rtc_nanotime_init_commpage(void)
 {
-       int     RtcFlag;
-       int     pic;
-
-#if    NCPUS > 1
-       mp_disable_preemption();
-       if (cpu_number() != master_cpu) {
-               mp_enable_preemption();
-               return(1);
-       }
-       mp_enable_preemption();
-#endif
-       /*
-        * Setup device.
-        */
-       pic = 0;        /* FIXME .. interrupt registration moved to AppleIntelClock */
-
-
-       /*
-        * We should attempt to test the real-time clock
-        * device here. If it were to fail, we should panic
-        * the system.
-        */
-       RtcFlag = /* test device */1;
-       printf("realtime clock configured\n");
+       spl_t                   s = splclock();
 
-       simple_lock_init(&rtclock.lock, ETAP_NO_TRACE);
-       return (RtcFlag);
+       rtc_nanotime_set_commpage(&pal_rtc_nanotime_info);
+       splx(s);
 }
 
 /*
- * Initialize the real-time clock device. Return success (1)
- * or failure (0). Since the real-time clock is required to
- * provide canonical mapped time, we allocate a page to keep
- * the clock time value. In addition, various variables used
- * to support the clock are initialized.  Note: the clock is
- * not started until rtclock_reset is called.
+ * rtc_nanotime_read:
+ *
+ * Returns the current nanotime value, accessable from any
+ * context.
  */
-int
-sysclk_init(void)
+static inline uint64_t
+rtc_nanotime_read(void)
 {
-       vm_offset_t     *vp;
-#if    NCPUS > 1
-       mp_disable_preemption();
-       if (cpu_number() != master_cpu) {
-               mp_enable_preemption();
-               return(1);
-       }
-       mp_enable_preemption();
-#endif
-
-       RtcTime = &rtclock.time;
-       rtc_setvals( CLKNUM, RTC_MINRES );  /* compute constants */
-       rtc_set_cyc_per_sec();  /* compute number of tsc beats per second */
-       clock_timebase_init();
-       return (1);
+       return _rtc_nanotime_read(&pal_rtc_nanotime_info);
 }
 
-static volatile unsigned int     last_ival = 0;
-
 /*
- * Get the clock device time. This routine is responsible
- * for converting the device's machine dependent time value
- * into a canonical mach_timespec_t value.
+ * rtc_clock_napped:
+ *
+ * Invoked from power management when we exit from a low C-State (>= C4)
+ * and the TSC has stopped counting.  The nanotime data is updated according
+ * to the provided value which represents the new value for nanotime.
  */
-kern_return_t
-sysclk_gettime(
-       mach_timespec_t *cur_time)      /* OUT */
+void
+rtc_clock_napped(uint64_t base, uint64_t tsc_base)
 {
-       if (!RtcTime) {
-               /* Uninitialized */
-               cur_time->tv_nsec = 0;
-               cur_time->tv_sec = 0;
-               return (KERN_SUCCESS);
-       }
+       pal_rtc_nanotime_t      *rntp = &pal_rtc_nanotime_info;
+       uint64_t        oldnsecs;
+       uint64_t        newnsecs;
+       uint64_t        tsc;
 
-        *cur_time = rdtsc_to_timespec();
-       return (KERN_SUCCESS);
-}
+       assert(!ml_get_interrupts_enabled());
+       tsc = rdtsc64();
+       oldnsecs = rntp->ns_base + _rtc_tsc_to_nanoseconds(tsc - rntp->tsc_base, rntp);
+       newnsecs = base + _rtc_tsc_to_nanoseconds(tsc - tsc_base, rntp);
 
-kern_return_t
-sysclk_gettime_internal(
-       mach_timespec_t *cur_time)      /* OUT */
-{
-       if (!RtcTime) {
-               /* Uninitialized */
-               cur_time->tv_nsec = 0;
-               cur_time->tv_sec = 0;
-               return (KERN_SUCCESS);
+       /*
+        * Only update the base values if time using the new base values
+        * is later than the time using the old base values.
+        */
+       if (oldnsecs < newnsecs) {
+               _pal_rtc_nanotime_store(tsc_base, base, rntp->scale, rntp->shift, rntp);
+               rtc_nanotime_set_commpage(rntp);
        }
-        *cur_time = rdtsc_to_timespec();
-       return (KERN_SUCCESS);
 }
 
 /*
- * Get the clock device time when ALL interrupts are already disabled.
- * Same as above except for turning interrupts off and on.
- * This routine is responsible for converting the device's machine dependent
- * time value into a canonical mach_timespec_t value.
+ * Invoked from power management to correct the SFLM TSC entry drift problem:
+ * a small delta is added to the tsc_base.  This is equivalent to nudgin time
+ * backwards.  We require this to be on the order of a TSC quantum which won't
+ * cause callers of mach_absolute_time() to see time going backwards!
  */
 void
-sysclk_gettime_interrupts_disabled(
-       mach_timespec_t *cur_time)      /* OUT */
-{
-       if (!RtcTime) {
-               /* Uninitialized */
-               cur_time->tv_nsec = 0;
-               cur_time->tv_sec = 0;
-               return;
-       }
-        *cur_time = rdtsc_to_timespec();
-}
-
-// utility routine 
-// Code to calculate how many processor cycles are in a second...
-
-static void
-rtc_set_cyc_per_sec() 
+rtc_clock_adjust(uint64_t tsc_base_delta)
 {
+       pal_rtc_nanotime_t  *rntp = &pal_rtc_nanotime_info;
 
-        uint32_t twen_cycles;
-        uint32_t cycles;
-
-        twen_cycles = timeRDTSC();
-        if (twen_cycles> (1000000000/20)) {
-            // we create this value so that you can use just a "fast" multiply to get nanos
-            rtc_quant_scale = create_mul_quant_GHZ(twen_cycles);
-            rtclock.timebase_const.numer = rtc_quant_scale;    // because ctimeRDTSC gives us 1/20 a seconds worth
-            rtclock.timebase_const.denom = 0xffffffff; // so that nanoseconds = (TSC * numer) / denom
-        
-        } else {
-            rtclock.timebase_const.numer = 1000000000/20;      // because ctimeRDTSC gives us 1/20 a seconds worth
-            rtclock.timebase_const.denom = twen_cycles;        // so that nanoseconds = (TSC * numer) / denom
-        }
-        cycles = twen_cycles;          // number of cycles in 1/20th a second
-       rtc_cyc_per_sec = cycles*20;    // multiply it by 20 and we are done.. BUT we also want to calculate...
-
-        cycles = ((rtc_cyc_per_sec + UI_CPUFREQ_ROUNDING_FACTOR - 1) / UI_CPUFREQ_ROUNDING_FACTOR) * UI_CPUFREQ_ROUNDING_FACTOR;
-        gPEClockFrequencyInfo.cpu_clock_rate_hz = cycles;
-DISPLAYVALUE(rtc_set_cyc_per_sec,rtc_cyc_per_sec);
-DISPLAYEXIT(rtc_set_cyc_per_sec);
+       assert(!ml_get_interrupts_enabled());
+       assert(tsc_base_delta < 100ULL); /* i.e. it's small */
+       _rtc_nanotime_adjust(tsc_base_delta, rntp);
+       rtc_nanotime_set_commpage(rntp);
 }
 
+/*
+ * rtc_sleep_wakeup:
+ *
+ * Invoked from power management when we have awoken from a sleep (S3)
+ * and the TSC has been reset, or from Deep Idle (S0) sleep when the TSC
+ * has progressed.  The nanotime data is updated based on the passed-in value.
+ *
+ * The caller must guarantee non-reentrancy.
+ */
 void
-clock_get_system_microtime(
-       uint32_t                        *secs,
-       uint32_t                        *microsecs)
+rtc_sleep_wakeup(
+       uint64_t                base)
 {
-       mach_timespec_t         now;
-
-       sysclk_gettime(&now);
+       /* Set fixed configuration for lapic timers */
+       rtc_timer->rtc_config();
 
-       *secs = now.tv_sec;
-       *microsecs = now.tv_nsec / NSEC_PER_USEC;
+       /*
+        * Reset nanotime.
+        * The timestamp counter will have been reset
+        * but nanotime (uptime) marches onward.
+        */
+       rtc_nanotime_init(base);
 }
 
 void
-clock_get_system_nanotime(
-       uint32_t                        *secs,
-       uint32_t                        *nanosecs)
+rtc_decrementer_configure(void)
 {
-       mach_timespec_t         now;
-
-       sysclk_gettime(&now);
-
-       *secs = now.tv_sec;
-       *nanosecs = now.tv_nsec;
+       rtc_timer->rtc_config();
 }
-
 /*
- * Get clock device attributes.
+ * rtclock_early_init() is called very early at boot to
+ * establish mach_absolute_time() and set it to zero.
  */
-kern_return_t
-sysclk_getattr(
-       clock_flavor_t          flavor,
-       clock_attr_t            attr,           /* OUT */
-       mach_msg_type_number_t  *count)         /* IN/OUT */
+void
+rtclock_early_init(void)
 {
-       spl_t   s;
-
-       if (*count != 1)
-               return (KERN_FAILURE);
-       switch (flavor) {
-
-       case CLOCK_GET_TIME_RES:        /* >0 res */
-#if    (NCPUS == 1)
-               LOCK_RTC(s);
-               *(clock_res_t *) attr = 1000;
-               UNLOCK_RTC(s);
-               break;
-#endif /* (NCPUS == 1) */
-       case CLOCK_ALARM_CURRES:        /* =0 no alarm */
-               LOCK_RTC(s);
-               *(clock_res_t *) attr = rtclock.intr_nsec;
-               UNLOCK_RTC(s);
-               break;
-
-       case CLOCK_ALARM_MAXRES:
-               *(clock_res_t *) attr = RTC_MAXRES;
-               break;
-
-       case CLOCK_ALARM_MINRES:
-               *(clock_res_t *) attr = RTC_MINRES;
-               break;
-
-       default:
-               return (KERN_INVALID_VALUE);
-       }
-       return (KERN_SUCCESS);
+       assert(tscFreq);
+       rtc_set_timescale(tscFreq);
 }
 
 /*
- * Set clock device attributes.
+ * Initialize the real-time clock device.
+ * In addition, various variables used to support the clock are initialized.
  */
-kern_return_t
-sysclk_setattr(
-       clock_flavor_t          flavor,
-       clock_attr_t            attr,           /* IN */
-       mach_msg_type_number_t  count)          /* IN */
+int
+rtclock_init(void)
 {
-       spl_t           s;
-       int             freq;
-       int             adj;
-       clock_res_t     new_ires;
-
-       if (count != 1)
-               return (KERN_FAILURE);
-       switch (flavor) {
+       uint64_t        cycles;
 
-       case CLOCK_GET_TIME_RES:
-       case CLOCK_ALARM_MAXRES:
-       case CLOCK_ALARM_MINRES:
-               return (KERN_FAILURE);
+       assert(!ml_get_interrupts_enabled());
 
-       case CLOCK_ALARM_CURRES:
-               new_ires = *(clock_res_t *) attr;
+       if (cpu_number() == master_cpu) {
+               assert(tscFreq);
 
                /*
-                * The new resolution must be within the predetermined
-                * range.  If the desired resolution cannot be achieved
-                * to within 0.1%, an error is returned.
+                * Adjust and set the exported cpu speed.
                 */
-               if (new_ires < RTC_MAXRES || new_ires > RTC_MINRES)
-                       return (KERN_INVALID_VALUE);
-               freq = (NSEC_PER_SEC / new_ires);
-               adj = (((clknum % freq) * new_ires) / clknum);
-               if (adj > (new_ires / 1000))
-                       return (KERN_INVALID_VALUE);
+               cycles = rtc_export_speed(tscFreq);
+
                /*
-                * Record the new alarm resolution which will take effect
-                * on the next HZ aligned clock tick.
+                * Set min/max to actual.
+                * ACPI may update these later if speed-stepping is detected.
                 */
-               LOCK_RTC(s);
-               if ( freq != rtc_intr_freq ) {
-                   rtclock.new_ires = new_ires;
-                   new_clknum = clknum;
-               }
-               UNLOCK_RTC(s);
-               return (KERN_SUCCESS);
-
-       default:
-               return (KERN_INVALID_VALUE);
-       }
-}
-
-/*
- * Set next alarm time for the clock device. This call
- * always resets the time to deliver an alarm for the
- * clock.
- */
-void
-sysclk_setalarm(
-       mach_timespec_t *alarm_time)
-{
-       spl_t           s;
+               gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles;
+               gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles;
 
-       LOCK_RTC(s);
-       rtclock.alarm_time = *alarm_time;
-       RtcAlrm = &rtclock.alarm_time;
-       UNLOCK_RTC(s);
-}
-
-/*
- * Configure the calendar clock.
- */
-int
-calend_config(void)
-{
-       return bbc_config();
-}
-
-/*
- * Initialize calendar clock.
- */
-int
-calend_init(void)
-{
-       return (1);
-}
-
-/*
- * Get the current clock time.
- */
-kern_return_t
-calend_gettime(
-       mach_timespec_t *cur_time)      /* OUT */
-{
-       spl_t           s;
-
-       LOCK_RTC(s);
-       if (!rtclock.calend_is_set) {
-               UNLOCK_RTC(s);
-               return (KERN_FAILURE);
+               rtc_timer_init();
+               clock_timebase_init();
+               ml_init_lock_timeout();
+               ml_init_delay_spin_threshold(10);
        }
 
-       (void) sysclk_gettime_internal(cur_time);
-       ADD_MACH_TIMESPEC(cur_time, &rtclock.calend_offset);
-       UNLOCK_RTC(s);
-
-       return (KERN_SUCCESS);
-}
-
-void
-clock_get_calendar_microtime(
-       uint32_t                        *secs,
-       uint32_t                        *microsecs)
-{
-       mach_timespec_t         now;
+       /* Set fixed configuration for lapic timers */
+       rtc_timer->rtc_config();
+       rtc_timer_start();
 
-       calend_gettime(&now);
-
-       *secs = now.tv_sec;
-       *microsecs = now.tv_nsec / NSEC_PER_USEC;
+       return 1;
 }
 
-void
-clock_get_calendar_nanotime(
-       uint32_t                        *secs,
-       uint32_t                        *nanosecs)
-{
-       mach_timespec_t         now;
-
-       calend_gettime(&now);
-
-       *secs = now.tv_sec;
-       *nanosecs = now.tv_nsec;
-}
+// utility routine
+// Code to calculate how many processor cycles are in a second...
 
-void
-clock_set_calendar_microtime(
-       uint32_t                        secs,
-       uint32_t                        microsecs)
+static void
+rtc_set_timescale(uint64_t cycles)
 {
-       mach_timespec_t         new_time, curr_time;
-       spl_t           s;
+       pal_rtc_nanotime_t      *rntp = &pal_rtc_nanotime_info;
+       uint32_t    shift = 0;
 
-       LOCK_RTC(s);
-       (void) sysclk_gettime_internal(&curr_time);
-       rtclock.calend_offset.tv_sec = new_time.tv_sec = secs;
-       rtclock.calend_offset.tv_nsec = new_time.tv_nsec = microsecs * NSEC_PER_USEC;
-       SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time);
-       rtclock.calend_is_set = TRUE;
-       UNLOCK_RTC(s);
+       /* the "scale" factor will overflow unless cycles>SLOW_TSC_THRESHOLD */
 
-       (void) bbc_settime(&new_time);
-
-       host_notify_calendar_change();
-}
-
-/*
- * Get clock device attributes.
- */
-kern_return_t
-calend_getattr(
-       clock_flavor_t          flavor,
-       clock_attr_t            attr,           /* OUT */
-       mach_msg_type_number_t  *count)         /* IN/OUT */
-{
-       spl_t   s;
-
-       if (*count != 1)
-               return (KERN_FAILURE);
-       switch (flavor) {
-
-       case CLOCK_GET_TIME_RES:        /* >0 res */
-#if    (NCPUS == 1)
-               LOCK_RTC(s);
-               *(clock_res_t *) attr = 1000;
-               UNLOCK_RTC(s);
-               break;
-#else  /* (NCPUS == 1) */
-               LOCK_RTC(s);
-               *(clock_res_t *) attr = rtclock.intr_nsec;
-               UNLOCK_RTC(s);
-               break;
-#endif /* (NCPUS == 1) */
-
-       case CLOCK_ALARM_CURRES:        /* =0 no alarm */
-       case CLOCK_ALARM_MINRES:
-       case CLOCK_ALARM_MAXRES:
-               *(clock_res_t *) attr = 0;
-               break;
-
-       default:
-               return (KERN_INVALID_VALUE);
+       while (cycles <= SLOW_TSC_THRESHOLD) {
+               shift++;
+               cycles <<= 1;
        }
-       return (KERN_SUCCESS);
-}
-
-#define tickadj                (40*NSEC_PER_USEC)      /* "standard" skew, ns / tick */
-#define        bigadj          (NSEC_PER_SEC)          /* use 10x skew above bigadj ns */
-
-uint32_t
-clock_set_calendar_adjtime(
-       int32_t                         *secs,
-       int32_t                         *microsecs)
-{
-       int64_t                 total, ototal;
-       uint32_t                interval = 0;
-       spl_t                   s;
 
-       total = (int64_t)*secs * NSEC_PER_SEC + *microsecs * NSEC_PER_USEC;
+       rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / cycles);
 
-       LOCK_RTC(s);
-       ototal = rtclock.calend_adjtotal;
-
-       if (total != 0) {
-               int32_t         delta = tickadj;
-
-               if (total > 0) {
-                       if (total > bigadj)
-                               delta *= 10;
-                       if (delta > total)
-                               delta = total;
-               }
-               else {
-                       if (total < -bigadj)
-                               delta *= 10;
-                       delta = -delta;
-                       if (delta < total)
-                               delta = total;
-               }
+       rntp->shift = shift;
 
-               rtclock.calend_adjtotal = total;
-               rtclock.calend_adjdelta = delta;
-
-               interval = (NSEC_PER_SEC / HZ);
-       }
-       else
-               rtclock.calend_adjdelta = rtclock.calend_adjtotal = 0;
-
-       UNLOCK_RTC(s);
-
-       if (ototal == 0)
-               *secs = *microsecs = 0;
-       else {
-               *secs = ototal / NSEC_PER_SEC;
-               *microsecs = ototal % NSEC_PER_SEC;
+       /*
+        * On some platforms, the TSC is not reset at warm boot. But the
+        * rebase time must be relative to the current boot so we can't use
+        * mach_absolute_time(). Instead, we convert the TSC delta since boot
+        * to nanoseconds.
+        */
+       if (tsc_rebase_abs_time == 0) {
+               tsc_rebase_abs_time = _rtc_tsc_to_nanoseconds(
+                       rdtsc64() - tsc_at_boot, rntp);
        }
 
-       return (interval);
+       rtc_nanotime_init(0);
 }
 
-uint32_t
-clock_adjust_calendar(void)
+static uint64_t
+rtc_export_speed(uint64_t cyc_per_sec)
 {
-       uint32_t                interval = 0;
-       int32_t                 delta;
-       spl_t                   s;
-
-       LOCK_RTC(s);
-       delta = rtclock.calend_adjdelta;
-       ADD_MACH_TIMESPEC_NSEC(&rtclock.calend_offset, delta);
+       pal_rtc_nanotime_t      *rntp = &pal_rtc_nanotime_info;
+       uint64_t        cycles;
 
-       rtclock.calend_adjtotal -= delta;
-
-       if (delta > 0) {
-               if (delta > rtclock.calend_adjtotal)
-                       rtclock.calend_adjdelta = rtclock.calend_adjtotal;
-       }
-       else
-       if (delta < 0) {
-               if (delta < rtclock.calend_adjtotal)
-                       rtclock.calend_adjdelta = rtclock.calend_adjtotal;
+       if (rntp->shift != 0) {
+               printf("Slow TSC, rtc_nanotime.shift == %d\n", rntp->shift);
        }
 
-       if (rtclock.calend_adjdelta != 0)
-               interval = (NSEC_PER_SEC / HZ);
+       /* Round: */
+       cycles = ((cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR / 2))
+           / UI_CPUFREQ_ROUNDING_FACTOR)
+           * UI_CPUFREQ_ROUNDING_FACTOR;
 
-       UNLOCK_RTC(s);
+       /*
+        * Set current measured speed.
+        */
+       if (cycles >= 0x100000000ULL) {
+               gPEClockFrequencyInfo.cpu_clock_rate_hz = 0xFFFFFFFFUL;
+       } else {
+               gPEClockFrequencyInfo.cpu_clock_rate_hz = (unsigned long)cycles;
+       }
+       gPEClockFrequencyInfo.cpu_frequency_hz = cycles;
 
-       return (interval);
+       kprintf("[RTCLOCK] frequency %llu (%llu)\n", cycles, cyc_per_sec);
+       return cycles;
 }
 
 void
-clock_initialize_calendar(void)
+clock_get_system_microtime(
+       clock_sec_t                     *secs,
+       clock_usec_t            *microsecs)
 {
-       mach_timespec_t bbc_time, curr_time;
-       spl_t           s;
-
-       if (bbc_gettime(&bbc_time) != KERN_SUCCESS)
-               return;
-
-       LOCK_RTC(s);
-       if (!rtclock.calend_is_set) {
-               (void) sysclk_gettime_internal(&curr_time);
-               rtclock.calend_offset = bbc_time;
-               SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time);
-               rtclock.calend_is_set = TRUE;
-       }
-       UNLOCK_RTC(s);
+       uint64_t        now = rtc_nanotime_read();
 
-       host_notify_calendar_change();
+       _absolutetime_to_microtime(now, secs, microsecs);
 }
 
 void
-clock_timebase_info(
-       mach_timebase_info_t    info)
+clock_get_system_nanotime(
+       clock_sec_t                     *secs,
+       clock_nsec_t            *nanosecs)
 {
-       spl_t   s;
+       uint64_t        now = rtc_nanotime_read();
 
-       LOCK_RTC(s);
-       if (rtclock.timebase_const.denom == 0xFFFFFFFF) {
-               info->numer = info->denom = rtc_quant_scale;
-       } else {
-               info->numer = info->denom = 1;
-       }
-       UNLOCK_RTC(s);
-}      
+       _absolutetime_to_nanotime(now, secs, nanosecs);
+}
 
 void
-clock_set_timer_deadline(
-       uint64_t                        deadline)
+clock_gettimeofday_set_commpage(uint64_t abstime, uint64_t sec, uint64_t frac, uint64_t scale, uint64_t tick_per_sec)
 {
-       spl_t                   s;
-
-       LOCK_RTC(s);
-       rtclock.timer_deadline = deadline;
-       rtclock.timer_is_set = TRUE;
-       UNLOCK_RTC(s);
+       commpage_set_timestamp(abstime, sec, frac, scale, tick_per_sec);
 }
 
 void
-clock_set_timer_func(
-       clock_timer_func_t              func)
+clock_timebase_info(
+       mach_timebase_info_t    info)
 {
-       spl_t           s;
-
-       LOCK_RTC(s);
-       if (rtclock.timer_expire == NULL)
-               rtclock.timer_expire = func;
-       UNLOCK_RTC(s);
+       info->numer = info->denom =  1;
 }
 
-\f
-
 /*
- * Load the count register and start the clock.
+ * Real-time clock device interrupt.
  */
-#define RTCLOCK_RESET()        {                                       \
-       outb(PITCTL_PORT, PIT_C0|PIT_NDIVMODE|PIT_READMODE);    \
-       outb(PITCTR0_PORT, (clks_per_int & 0xff));              \
-       outb(PITCTR0_PORT, (clks_per_int >> 8));                \
-}
-
-/*
- * Reset the clock device. This causes the realtime clock
- * device to reload its mode and count value (frequency).
- * Note: the CPU should be calibrated
- * before starting the clock for the first time.
- */
-
 void
-rtclock_reset(void)
+rtclock_intr(
+       x86_saved_state_t       *tregs)
 {
-       int             s;
+       uint64_t        rip;
+       boolean_t       user_mode = FALSE;
 
-#if    NCPUS > 1
-       mp_disable_preemption();
-       if (cpu_number() != master_cpu) {
-               mp_enable_preemption();
-               return;
-       }
-       mp_enable_preemption();
-#endif /* NCPUS > 1 */
-       LOCK_RTC(s);
-       RTCLOCK_RESET();
-       UNLOCK_RTC(s);
-}
+       assert(get_preemption_level() > 0);
+       assert(!ml_get_interrupts_enabled());
 
-/*
- * Real-time clock device interrupt. Called only on the
- * master processor. Updates the clock time and upcalls
- * into the higher level clock code to deliver alarms.
- */
-int
-rtclock_intr(struct i386_interrupt_state *regs)
-{
-       uint64_t        abstime;
-       mach_timespec_t clock_time;
-       int             i;
-       spl_t           s;
-       boolean_t       usermode;
+       if (is_saved_state64(tregs) == TRUE) {
+               x86_saved_state64_t     *regs;
 
-       /*
-        * Update clock time. Do the update so that the macro
-        * MTS_TO_TS() for reading the mapped time works (e.g.
-        * update in order: mtv_csec, mtv_time.tv_nsec, mtv_time.tv_sec).
-        */      
-       LOCK_RTC(s);
-        abstime = rdtsctime_to_nanoseconds();          // get the time as of the TSC
-        clock_time = nanos_to_timespec(abstime);       // turn it into a timespec
-        rtclock.time.tv_nsec = clock_time.tv_nsec;
-        rtclock.time.tv_sec = clock_time.tv_sec;
-        rtclock.abstime = abstime;
-        
-       /* note time now up to date */
-       last_ival = 0;
+               regs = saved_state64(tregs);
 
-       /*
-        * On a HZ-tick boundary: return 0 and adjust the clock
-        * alarm resolution (if requested).  Otherwise return a
-        * non-zero value.
-        */
-       if ((i = --rtc_intr_count) == 0) {
-           if (rtclock.new_ires) {
-                       rtc_setvals(new_clknum, rtclock.new_ires);
-                       RTCLOCK_RESET();            /* lock clock register */
-                       rtclock.new_ires = 0;
-           }
-           rtc_intr_count = rtc_intr_hertz;
-           UNLOCK_RTC(s);
-           usermode = (regs->efl & EFL_VM) || ((regs->cs & 0x03) != 0);
-           hertz_tick(usermode, regs->eip);
-           LOCK_RTC(s);
+               if (regs->isf.cs & 0x03) {
+                       user_mode = TRUE;
+               }
+               rip = regs->isf.rip;
+       } else {
+               x86_saved_state32_t     *regs;
+
+               regs = saved_state32(tregs);
+
+               if (regs->cs & 0x03) {
+                       user_mode = TRUE;
+               }
+               rip = regs->eip;
        }
 
-       if (    rtclock.timer_is_set                            &&
-                       rtclock.timer_deadline <= abstime               ) {
-               rtclock.timer_is_set = FALSE;
-               UNLOCK_RTC(s);
+       /* call the generic etimer */
+       timer_intr(user_mode, rip);
+}
 
-               (*rtclock.timer_expire)(abstime);
 
-               LOCK_RTC(s);
-       }
+/*
+ *     Request timer pop from the hardware
+ */
 
-       /*
-        * Perform alarm clock processing if needed. The time
-        * passed up is incremented by a half-interrupt tick
-        * to trigger alarms closest to their desired times.
-        * The clock_alarm_intr() routine calls sysclk_setalrm()
-        * before returning if later alarms are pending.
-        */
+uint64_t
+setPop(uint64_t time)
+{
+       uint64_t        now;
+       uint64_t        pop;
 
-       if (RtcAlrm && (RtcAlrm->tv_sec < RtcTime->tv_sec ||
-                       (RtcAlrm->tv_sec == RtcTime->tv_sec &&
-                        RtcDelt >= RtcAlrm->tv_nsec - RtcTime->tv_nsec))) {
-               clock_time.tv_sec = 0;
-               clock_time.tv_nsec = RtcDelt;
-               ADD_MACH_TIMESPEC (&clock_time, RtcTime);
-               RtcAlrm = 0;
-               UNLOCK_RTC(s);
-               /*
-                * Call clock_alarm_intr() without RTC-lock.
-                * The lock ordering is always CLOCK-lock
-                * before RTC-lock.
-                */
-               clock_alarm_intr(SYSTEM_CLOCK, &clock_time);
-               LOCK_RTC(s);
+       /* 0 and EndOfAllTime are special-cases for "clear the timer" */
+       if (time == 0 || time == EndOfAllTime) {
+               time = EndOfAllTime;
+               now = 0;
+               pop = rtc_timer->rtc_set(0, 0);
+       } else {
+               now = rtc_nanotime_read();      /* The time in nanoseconds */
+               pop = rtc_timer->rtc_set(time, now);
        }
 
-       UNLOCK_RTC(s);
-       return (i);
-}
+       /* Record requested and actual deadlines set */
+       x86_lcpu()->rtcDeadline = time;
+       x86_lcpu()->rtcPop      = pop;
 
-void
-clock_get_uptime(
-       uint64_t                *result)
-{
-        *result = rdtsctime_to_nanoseconds();
+       return pop - now;
 }
 
 uint64_t
 mach_absolute_time(void)
 {
-        return rdtsctime_to_nanoseconds();
+       return rtc_nanotime_read();
 }
 
-void
-clock_interval_to_deadline(
-       uint32_t                interval,
-       uint32_t                scale_factor,
-       uint64_t                *result)
+uint64_t
+mach_approximate_time(void)
 {
-       uint64_t                abstime;
-
-       clock_get_uptime(result);
-
-       clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
-
-       *result += abstime;
+       return rtc_nanotime_read();
 }
 
 void
 clock_interval_to_absolutetime_interval(
-       uint32_t                interval,
-       uint32_t                scale_factor,
-       uint64_t                *result)
+       uint32_t                interval,
+       uint32_t                scale_factor,
+       uint64_t                *result)
 {
        *result = (uint64_t)interval * scale_factor;
 }
 
 void
-clock_absolutetime_interval_to_deadline(
-       uint64_t                abstime,
-       uint64_t                *result)
+absolutetime_to_microtime(
+       uint64_t                        abstime,
+       clock_sec_t                     *secs,
+       clock_usec_t            *microsecs)
 {
-       clock_get_uptime(result);
+       _absolutetime_to_microtime(abstime, secs, microsecs);
+}
 
-       *result += abstime;
+void
+nanotime_to_absolutetime(
+       clock_sec_t                     secs,
+       clock_nsec_t            nanosecs,
+       uint64_t                        *result)
+{
+       *result = ((uint64_t)secs * NSEC_PER_SEC) + nanosecs;
 }
 
 void
 absolutetime_to_nanoseconds(
-       uint64_t                abstime,
-       uint64_t                *result)
+       uint64_t                abstime,
+       uint64_t                *result)
 {
        *result = abstime;
 }
 
 void
 nanoseconds_to_absolutetime(
-       uint64_t                nanoseconds,
-       uint64_t                *result)
+       uint64_t                nanoseconds,
+       uint64_t                *result)
 {
        *result = nanoseconds;
 }
 
-/*
- * Spin-loop delay primitives.
- */
-void
-delay_for_interval(
-       uint32_t                interval,
-       uint32_t                scale_factor)
-{
-       uint64_t                now, end;
-
-       clock_interval_to_deadline(interval, scale_factor, &end);
-
-       do {
-               cpu_pause();
-               now = mach_absolute_time();
-       } while (now < end);
-}
-
 void
-clock_delay_until(
-       uint64_t                deadline)
+machine_delay_until(
+       uint64_t interval,
+       uint64_t                deadline)
 {
-       uint64_t                now;
-
-       do {
+       (void)interval;
+       while (mach_absolute_time() < deadline) {
                cpu_pause();
-               now = mach_absolute_time();
-       } while (now < deadline);
-}
-
-void
-delay(
-       int             usec)
-{
-       delay_for_interval((usec < 0)? -usec: usec, NSEC_PER_USEC);
+       }
 }