xnu-4570.71.2.tar.gz

[apple/xnu.git] / osfmk / i386 / rtclock.c
diff --git a/osfmk/i386/rtclock.c b/osfmk/i386/rtclock.c

index 8fe8ff88bc8208ea1768f007f6396788f7c9df0e..c8abc4b1eba22c88ca0d342f957f70fe99ec471f 100644 (file)
--- a/osfmk/i386/rtclock.c
+++ b/osfmk/i386/rtclock.c
@@ -1,14 +1,19 @@
  /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
   * This file contains Original Code and/or Modifications of Original Code
   * as defined in and that are subject to the Apple Public Source License
   * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
   * 
   * The Original Code and all software distributed under the License are
   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -18,7 +23,7 @@
   * Please see the License for the specific language governing rights and
   * limitations under the License.
   * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   */
  /*
   * @OSF_COPYRIGHT@
@@ -34,8 +39,6 @@
   *                     the cpu clock counted by the timestamp MSR.
   */
  
-#include <platforms.h>
-#include <mach_kdb.h>
  
  #include <mach/mach_types.h>
  
@@ -47,824 +50,234 @@
  #include <kern/misc_protos.h>
  #include <kern/spl.h>
  #include <kern/assert.h>
+#include <kern/timer_queue.h>
  #include <mach/vm_prot.h>
  #include <vm/pmap.h>
  #include <vm/vm_kern.h>                /* for kernel_map */
-#include <i386/ipl.h>
-#include <i386/pit.h>
-#include <i386/pio.h>
-#include <i386/misc_protos.h>
-#include <i386/proc_reg.h>
+#include <architecture/i386/pio.h>
  #include <i386/machine_cpu.h>
-#include <i386/mp.h>
  #include <i386/cpuid.h>
-#include <i386/cpu_data.h>
  #include <i386/cpu_threads.h>
-#include <i386/perfmon.h>
+#include <i386/mp.h>
  #include <i386/machine_routines.h>
-#include <i386/AT386/bbclock_entries.h>
+#include <i386/pal_routines.h>
+#include <i386/proc_reg.h>
+#include <i386/misc_protos.h>
  #include <pexpert/pexpert.h>
  #include <machine/limits.h>
  #include <machine/commpage.h>
  #include <sys/kdebug.h>
-
-#define MAX(a,b) (((a)>(b))?(a):(b))
-#define MIN(a,b) (((a)>(b))?(b):(a))
-
-#define NSEC_PER_HZ                    (NSEC_PER_SEC / 100) /* nsec per tick */
-
+#include <i386/tsc.h>
+#include <i386/rtclock_protos.h>
  #define UI_CPUFREQ_ROUNDING_FACTOR     10000000
  
-int            sysclk_config(void);
-
-int            sysclk_init(void);
-
-kern_return_t  sysclk_gettime(
-       mach_timespec_t                 *cur_time);
-
-kern_return_t  sysclk_getattr(
-       clock_flavor_t                  flavor,
-       clock_attr_t                    attr,
-       mach_msg_type_number_t  *count);
-
-void           sysclk_setalarm(
-       mach_timespec_t                 *alarm_time);
-
-/*
- * Lists of clock routines.
- */
-struct clock_ops  sysclk_ops = {
-       sysclk_config,                  sysclk_init,
-       sysclk_gettime,                 0,
-       sysclk_getattr,                 0,
-       sysclk_setalarm,
-};
-
-int            calend_config(void);
-
-int            calend_init(void);
-
-kern_return_t  calend_gettime(
-       mach_timespec_t                 *cur_time);
-
-kern_return_t  calend_getattr(
-       clock_flavor_t                  flavor,
-       clock_attr_t                    attr,
-       mach_msg_type_number_t  *count);
-
-struct clock_ops calend_ops = {
-       calend_config,                  calend_init,
-       calend_gettime,                 0,
-       calend_getattr,                 0,
-       0,
-};
-
-/* local data declarations */
-
-static clock_timer_func_t      rtclock_timer_expire;
-
-static timer_call_data_t       rtclock_alarm_timer;
-
-static void    rtclock_alarm_expire(
-                       timer_call_param_t      p0,
-                       timer_call_param_t      p1);
-
-struct {
-       mach_timespec_t                 calend_offset;
-       boolean_t                       calend_is_set;
-
-       int64_t                         calend_adjtotal;
-       int32_t                         calend_adjdelta;
-
-       uint32_t                        boottime;
-
-        mach_timebase_info_data_t      timebase_const;
-
-       decl_simple_lock_data(,lock)    /* real-time clock device lock */
-} rtclock;
-
-boolean_t              rtc_initialized = FALSE;
-clock_res_t            rtc_intr_nsec = NSEC_PER_HZ;    /* interrupt res */
-uint64_t               rtc_cycle_count;        /* clocks in 1/20th second */
-uint64_t               rtc_cyc_per_sec;        /* processor cycles per sec */
-uint32_t               rtc_boot_frequency;     /* provided by 1st speed-step */
-uint32_t               rtc_quant_scale;        /* clock to nanos multiplier */
-uint32_t               rtc_quant_shift;        /* clock to nanos right shift */
-uint64_t               rtc_decrementer_min;
-
-static mach_timebase_info_data_t       rtc_lapic_scale; /* nsec to lapic count */
-
-/*
- *     Macros to lock/unlock real-time clock data.
- */
-#define RTC_INTRS_OFF(s)               \
-       (s) = splclock()
-
-#define RTC_INTRS_ON(s)                        \
-       splx(s)
-
-#define RTC_LOCK(s)                    \
-MACRO_BEGIN                            \
-       RTC_INTRS_OFF(s);               \
-       simple_lock(&rtclock.lock);     \
-MACRO_END
-
-#define RTC_UNLOCK(s)                  \
-MACRO_BEGIN                            \
-       simple_unlock(&rtclock.lock);   \
-       RTC_INTRS_ON(s);                \
-MACRO_END
-
-/*
- * i8254 control.  ** MONUMENT **
- *
- * The i8254 is a traditional PC device with some arbitrary characteristics.
- * Basically, it is a register that counts at a fixed rate and can be
- * programmed to generate an interrupt every N counts.  The count rate is
- * clknum counts per sec (see pit.h), historically 1193167=14.318MHz/12
- * but the more accurate value is 1193182=14.31818MHz/12. [14.31818 MHz being
- * the master crystal oscillator reference frequency since the very first PC.]
- * Various constants are computed based on this value, and we calculate
- * them at init time for execution efficiency.  To obtain sufficient
- * accuracy, some of the calculation are most easily done in floating
- * point and then converted to int.
- *
- */
-
-/*
- * Forward decl.
- */
-
-static uint64_t        rtc_set_cyc_per_sec(uint64_t cycles);
-uint64_t       rtc_nanotime_read(void);
-
-/*
- * create_mul_quant_GHZ
- *   create a constant used to multiply the TSC by to convert to nanoseconds.
- *   This is a 32 bit number and the TSC *MUST* have a frequency higher than
- *   1000Mhz for this routine to work.
- *
- * The theory here is that we know how many TSCs-per-sec the processor runs at.
- * Normally to convert this to nanoseconds you would multiply the current
- * timestamp by 1000000000 (a billion) then divide by TSCs-per-sec.
- * Unfortunatly the TSC is 64 bits which would leave us with 96 bit intermediate
- * results from the multiply that must be divided by.
- * Usually thats
- *   uint96 = tsc * numer
- *   nanos = uint96 / denom
- * Instead, we create this quant constant and it becomes the numerator,
- * the denominator can then be 0x100000000 which makes our division as simple as
- * forgetting the lower 32 bits of the result. We can also pass this number to
- * user space as the numer and pass 0xFFFFFFFF (RTC_FAST_DENOM) as the denom to
- * convert raw counts * to nanos. The difference is so small as to be
- * undetectable by anything.
- *
- * Unfortunatly we can not do this for sub GHZ processors. In this case, all
- * we do is pass the CPU speed in raw as the denom and we pass in 1000000000
- * as the numerator. No short cuts allowed
- */
-#define RTC_FAST_DENOM 0xFFFFFFFF
-inline static uint32_t
-create_mul_quant_GHZ(int shift, uint32_t quant)
-{
-       return (uint32_t)((((uint64_t)NSEC_PER_SEC/20) << shift) / quant);
-}
-/*
- * This routine takes a value of raw TSC ticks and applies the passed mul_quant
- * generated by create_mul_quant() This is our internal routine for creating
- * nanoseconds.
- * Since we don't really have uint96_t this routine basically does this....
- *   uint96_t intermediate = (*value) * scale
- *   return (intermediate >> 32)
- */
-inline static uint64_t
-fast_get_nano_from_abs(uint64_t value, int scale)
-{
-    asm ("     movl    %%edx,%%esi     \n\t"
-         "      mull   %%ecx           \n\t"
-         "      movl   %%edx,%%edi     \n\t"
-         "      movl   %%esi,%%eax     \n\t"
-         "      mull   %%ecx           \n\t"
-         "      xorl   %%ecx,%%ecx     \n\t"   
-         "      addl   %%edi,%%eax     \n\t"   
-         "      adcl   %%ecx,%%edx         "
-               : "+A" (value)
-               : "c" (scale)
-               : "%esi", "%edi");
-    return value;
-}
-
-/*
- * This routine basically does this...
- * ts.tv_sec = nanos / 1000000000;     create seconds
- * ts.tv_nsec = nanos % 1000000000;    create remainder nanos
- */
-inline static mach_timespec_t 
-nanos_to_timespec(uint64_t nanos)
-{
-       union {
-               mach_timespec_t ts;
-               uint64_t u64;
-       } ret;
-        ret.u64 = nanos;
-        asm volatile("divl %1" : "+A" (ret.u64) : "r" (NSEC_PER_SEC));
-        return ret.ts;
-}
-
-/*
- * The following two routines perform the 96 bit arithmetic we need to
- * convert generic absolute<->nanoseconds
- * The multiply routine takes a uint64_t and a uint32_t and returns the result
- * in a uint32_t[3] array.
- * The divide routine takes this uint32_t[3] array and divides it by a uint32_t
- * returning a uint64_t
- */
-inline static void
-longmul(uint64_t       *abstime, uint32_t multiplicand, uint32_t *result)
-{
-    asm volatile(
-        " pushl        %%ebx                   \n\t"   
-        " movl %%eax,%%ebx             \n\t"
-        " movl (%%eax),%%eax           \n\t"
-        " mull %%ecx                   \n\t"
-        " xchg %%eax,%%ebx             \n\t"
-        " pushl        %%edx                   \n\t"
-        " movl 4(%%eax),%%eax          \n\t"
-        " mull %%ecx                   \n\t"
-        " movl %2,%%ecx                \n\t"
-        " movl %%ebx,(%%ecx)           \n\t"
-        " popl %%ebx                   \n\t"
-        " addl %%ebx,%%eax             \n\t"
-        " popl %%ebx                   \n\t"
-        " movl %%eax,4(%%ecx)          \n\t"
-        " adcl $0,%%edx                \n\t"
-        " movl %%edx,8(%%ecx)  // and save it"
-        : : "a"(abstime), "c"(multiplicand), "m"(result));
-    
-}
-
-inline static uint64_t
-longdiv(uint32_t *numer, uint32_t denom)
-{
-    uint64_t   result;
-    asm volatile(
-        " pushl        %%ebx                   \n\t"
-        " movl %%eax,%%ebx             \n\t"
-        " movl 8(%%eax),%%edx          \n\t"
-        " movl 4(%%eax),%%eax          \n\t"
-        " divl %%ecx                   \n\t"
-        " xchg %%ebx,%%eax             \n\t"
-        " movl (%%eax),%%eax           \n\t"
-        " divl %%ecx                   \n\t"
-        " xchg %%ebx,%%edx             \n\t"
-        " popl %%ebx                   \n\t"
-        : "=A"(result) : "a"(numer),"c"(denom));
-    return result;
-}
-
-/*
- * Enable or disable timer 2.
- * Port 0x61 controls timer 2:
- *   bit 0 gates the clock,
- *   bit 1 gates output to speaker.
- */
-inline static void
-enable_PIT2(void)
-{
-    asm volatile(
-        " inb   $0x61,%%al      \n\t"
-        " and   $0xFC,%%al       \n\t"
-        " or    $1,%%al         \n\t"
-        " outb  %%al,$0x61      \n\t"
-        : : : "%al" );
-}
-
-inline static void
-disable_PIT2(void)
-{
-    asm volatile(
-        " inb   $0x61,%%al      \n\t"
-        " and   $0xFC,%%al      \n\t"
-        " outb  %%al,$0x61      \n\t"
-        : : : "%al" );
-}
+int            rtclock_init(void);
  
-inline static void
-set_PIT2(int value)
-{
-/*
- * First, tell the clock we are going to write 16 bits to the counter
- *   and enable one-shot mode (command 0xB8 to port 0x43)
- * Then write the two bytes into the PIT2 clock register (port 0x42).
- * Loop until the value is "realized" in the clock,
- * this happens on the next tick.
- */
-    asm volatile(
-        " movb  $0xB8,%%al      \n\t"
-        " outb %%al,$0x43      \n\t"
-        " movb %%dl,%%al       \n\t"
-        " outb %%al,$0x42      \n\t"
-        " movb %%dh,%%al       \n\t"
-        " outb %%al,$0x42      \n"
-"1:      inb   $0x42,%%al      \n\t" 
-        " inb  $0x42,%%al      \n\t"
-        " cmp  %%al,%%dh       \n\t"
-        " jne  1b"
-        : : "d"(value) : "%al");
-}
+uint64_t       tsc_rebase_abs_time = 0;
  
-inline static uint64_t
-get_PIT2(unsigned int *value)
-{
-    register uint64_t  result;
-/*
- * This routine first latches the time (command 0x80 to port 0x43),
- * then gets the time stamp so we know how long the read will take later.
- * Read (from port 0x42) and return the current value of the timer.
- */
-    asm volatile(
-        " xorl %%ecx,%%ecx     \n\t"
-        " movb $0x80,%%al      \n\t"
-        " outb %%al,$0x43      \n\t"
-        " rdtsc                        \n\t"
-        " pushl        %%eax           \n\t"
-        " inb  $0x42,%%al      \n\t"
-        " movb %%al,%%cl       \n\t"
-        " inb  $0x42,%%al      \n\t"
-        " movb %%al,%%ch       \n\t"
-        " popl %%eax   "
-        : "=A"(result), "=c"(*value));
-    return result;
-}
-
-/*
- * timeRDTSC()
- * This routine sets up PIT counter 2 to count down 1/20 of a second.
- * It pauses until the value is latched in the counter
- * and then reads the time stamp counter to return to the caller.
- */
-static uint64_t
-timeRDTSC(void)
-{
-    int                attempts = 0;
-    uint64_t   latchTime;
-    uint64_t   saveTime,intermediate;
-    unsigned int timerValue, lastValue;
-    boolean_t   int_enabled;
-    /*
-     * Table of correction factors to account for
-     *   - timer counter quantization errors, and
-     *   - undercounts 0..5
-     */
-#define        SAMPLE_CLKS_EXACT       (((double) CLKNUM) / 20.0)
-#define        SAMPLE_CLKS_INT         ((int) CLKNUM / 20)
-#define SAMPLE_NSECS           (2000000000LL)
-#define SAMPLE_MULTIPLIER      (((double)SAMPLE_NSECS)*SAMPLE_CLKS_EXACT)
-#define ROUND64(x)             ((uint64_t)((x) + 0.5))
-    uint64_t   scale[6] = {
-       ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-0)), 
-       ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-1)), 
-       ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-2)), 
-       ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-3)), 
-       ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-4)), 
-       ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-5))
-    };
-                            
-    int_enabled = ml_set_interrupts_enabled(FALSE);
-    
-restart:
-    if (attempts >= 2)
-       panic("timeRDTSC() calibation failed with %d attempts\n", attempts);
-    attempts++;
-    enable_PIT2();      // turn on PIT2
-    set_PIT2(0);       // reset timer 2 to be zero
-    latchTime = rdtsc64();     // get the time stamp to time 
-    latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes
-    set_PIT2(SAMPLE_CLKS_INT); // set up the timer for (almost) 1/20th a second
-    saveTime = rdtsc64();      // now time how long a 20th a second is...
-    get_PIT2(&lastValue);
-    get_PIT2(&lastValue);      // read twice, first value may be unreliable
-    do {
-        intermediate = get_PIT2(&timerValue);
-        if (timerValue > lastValue) {
-           printf("Hey we are going backwards! %u -> %u, restarting timing\n",
-                       timerValue,lastValue);
-           set_PIT2(0);
-           disable_PIT2();
-           goto restart;
-       }
-        lastValue = timerValue;
-    } while (timerValue > 5);
-    kprintf("timerValue   %d\n",timerValue);
-    kprintf("intermediate 0x%016llx\n",intermediate);
-    kprintf("saveTime     0x%016llx\n",saveTime);
-    
-    intermediate -= saveTime;          // raw count for about 1/20 second
-    intermediate *= scale[timerValue]; // rescale measured time spent
-    intermediate /= SAMPLE_NSECS;      // so its exactly 1/20 a second
-    intermediate += latchTime;         // add on our save fudge
-    
-    set_PIT2(0);                       // reset timer 2 to be zero
-    disable_PIT2();                    // turn off PIT 2
-
-    ml_set_interrupts_enabled(int_enabled);
-    return intermediate;
-}
+static void    rtc_set_timescale(uint64_t cycles);
+static uint64_t        rtc_export_speed(uint64_t cycles);
  
-static uint64_t
-tsc_to_nanoseconds(uint64_t abstime)
-{
-        uint32_t       numer;
-        uint32_t       denom;
-        uint32_t       intermediate[3];
-        
-        numer = rtclock.timebase_const.numer;
-        denom = rtclock.timebase_const.denom;
-        if (denom == RTC_FAST_DENOM) {
-            abstime = fast_get_nano_from_abs(abstime, numer);
-        } else {
-            longmul(&abstime, numer, intermediate);
-            abstime = longdiv(intermediate, denom);
-        }
-        return abstime;
-}
-
-inline static mach_timespec_t 
-tsc_to_timespec(void)
-{
-        uint64_t       currNanos;
-        currNanos = rtc_nanotime_read();
-        return nanos_to_timespec(currNanos);
-}
-
-#define        DECREMENTER_MAX         UINT_MAX
-static uint32_t
-deadline_to_decrementer(
-       uint64_t        deadline,
-       uint64_t        now)
-{
-       uint64_t        delta;
-
-       if (deadline <= now)
-               return rtc_decrementer_min;
-       else {
-               delta = deadline - now;
-               return MIN(MAX(rtc_decrementer_min,delta),DECREMENTER_MAX); 
-       }
-}
-
-static inline uint64_t
-lapic_time_countdown(uint32_t initial_count)
-{
-       boolean_t               state;
-       uint64_t                start_time;
-       uint64_t                stop_time;
-       lapic_timer_count_t     count;
-
-       state = ml_set_interrupts_enabled(FALSE);
-       lapic_set_timer(FALSE, one_shot, divide_by_1, initial_count);
-       start_time = rdtsc64();
-       do {
-               lapic_get_timer(NULL, NULL, NULL, &count);
-       } while (count > 0);
-       stop_time = rdtsc64();
-       ml_set_interrupts_enabled(state);
-
-       return tsc_to_nanoseconds(stop_time - start_time);
-}
-
-static void
-rtc_lapic_timer_calibrate(void)
+void
+rtc_timer_start(void)
  {
-       uint32_t        nsecs;
-       uint64_t        countdown;
-
-       if (!(cpuid_features() & CPUID_FEATURE_APIC))
-               return;
-
         /*
-        * Set the local apic timer counting down to zero without an interrupt.
-        * Use the timestamp to calculate how long this takes.
-        */ 
-       nsecs = (uint32_t) lapic_time_countdown(rtc_intr_nsec);
-
-       /*
-        * Compute a countdown ratio for a given time in nanoseconds.
-        * That is, countdown = time * numer / denom.
+        * Force a complete re-evaluation of timer deadlines.
          */
-       countdown = (uint64_t)rtc_intr_nsec * (uint64_t)rtc_intr_nsec / nsecs;
-
-       nsecs = (uint32_t) lapic_time_countdown((uint32_t) countdown);
-
-       rtc_lapic_scale.numer = countdown;
-       rtc_lapic_scale.denom = nsecs;
-
-       kprintf("rtc_lapic_timer_calibrate() scale: %d/%d\n",
-               (uint32_t) countdown, nsecs);
+       x86_lcpu()->rtcDeadline = EndOfAllTime;
+       timer_resync_deadlines();
  }
  
-static void
-rtc_lapic_set_timer(
-       uint32_t        interval)
+static inline uint32_t
+_absolutetime_to_microtime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *microsecs)
  {
-       uint64_t        count;
-
-       assert(rtc_lapic_scale.denom);
-
-       count = interval * (uint64_t) rtc_lapic_scale.numer;
-       count /= rtc_lapic_scale.denom;
-
-       lapic_set_timer(TRUE, one_shot, divide_by_1, (uint32_t) count);
+       uint32_t remain;
+       *secs = abstime / (uint64_t)NSEC_PER_SEC;
+       remain = (uint32_t)(abstime % (uint64_t)NSEC_PER_SEC);
+       *microsecs = remain / NSEC_PER_USEC;
+       return remain;
  }
  
-static void
-rtc_lapic_start_ticking(void)
-{
-       uint64_t        abstime;
-       uint64_t        first_tick;
-       uint64_t        decr;
-
-       abstime = mach_absolute_time();
-       first_tick = abstime + NSEC_PER_HZ;
-       current_cpu_datap()->cpu_rtc_tick_deadline = first_tick;
-       decr = deadline_to_decrementer(first_tick, abstime);
-       rtc_lapic_set_timer(decr);
-}
-
-/*
- * Configure the real-time clock device. Return success (1)
- * or failure (0).
- */
-
-int
-sysclk_config(void)
+static inline void
+_absolutetime_to_nanotime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *nanosecs)
  {
-
-       mp_disable_preemption();
-       if (cpu_number() != master_cpu) {
-               mp_enable_preemption();
-               return(1);
-       }
-       mp_enable_preemption();
-
-       timer_call_setup(&rtclock_alarm_timer, rtclock_alarm_expire, NULL);
-
-       simple_lock_init(&rtclock.lock, 0);
-
-       return (1);
+       *secs = abstime / (uint64_t)NSEC_PER_SEC;
+       *nanosecs = (clock_usec_t)(abstime % (uint64_t)NSEC_PER_SEC);
  }
  
-
  /*
   * Nanotime/mach_absolutime_time
   * -----------------------------
- * The timestamp counter (tsc) - which counts cpu clock cycles and can be read
- * efficient by the kernel and in userspace - is the reference for all timing.
- * However, the cpu clock rate is not only platform-dependent but can change
- * (speed-step) dynamically. Hence tsc is converted into nanoseconds which is
- * identical to mach_absolute_time. The conversion to tsc to nanoseconds is
- * encapsulated by nanotime.
+ * The timestamp counter (TSC) - which counts cpu clock cycles and can be read
+ * efficiently by the kernel and in userspace - is the reference for all timing.
+ * The cpu clock rate is platform-dependent and may stop or be reset when the
+ * processor is napped/slept.  As a result, nanotime is the software abstraction
+ * used to maintain a monotonic clock, adjusted from an outside reference as needed.
   *
   * The kernel maintains nanotime information recording:
- *     - the current ratio of tsc to nanoseconds
+ *     - the ratio of tsc to nanoseconds
   *       with this ratio expressed as a 32-bit scale and shift
   *       (power of 2 divider);
- *     - the tsc (step_tsc) and nanotime (step_ns) at which the current
- *       ratio (clock speed) began.
- * So a tsc value can be converted to nanotime by:
+ *     - { tsc_base, ns_base } pair of corresponding timestamps.
   *
- *     nanotime = (((tsc - step_tsc)*scale) >> shift) + step_ns
+ * The tuple {tsc_base, ns_base, scale, shift} is exported in the commpage 
+ * for the userspace nanotime routine to read.
   *
- * In general, (tsc - step_tsc) is a 64-bit quantity with the scaling
- * involving a 96-bit intermediate value. However, by saving the converted 
- * values at each tick (or at any intervening speed-step) - base_tsc and
- * base_ns - we can perform conversions relative to these and be assured that
- * (tsc - tick_tsc) is 32-bits. Hence:
- *
- *     fast_nanotime = (((tsc - base_tsc)*scale) >> shift) + base_ns  
- *
- * The tuple {base_tsc, base_ns, scale, shift} is exported in the commpage 
- * for the userspace nanotime routine to read. A duplicate check_tsc is
- * appended so that the consistency of the read can be verified. Note that
- * this scheme is essential for MP systems in which the commpage is updated
- * by the master cpu but may be read concurrently by other cpus.
- * 
+ * All of the routines which update the nanotime data are non-reentrant.  This must
+ * be guaranteed by the caller.
   */
  static inline void
-rtc_nanotime_set_commpage(rtc_nanotime_t *rntp)
+rtc_nanotime_set_commpage(pal_rtc_nanotime_t *rntp)
  {
-       commpage_nanotime_t     cp_nanotime;
-
-       /* Only the master cpu updates the commpage */
-       if (cpu_number() != master_cpu)
-               return;
-
-       cp_nanotime.nt_base_tsc = rntp->rnt_tsc;
-       cp_nanotime.nt_base_ns = rntp->rnt_nanos;
-       cp_nanotime.nt_scale = rntp->rnt_scale;
-       cp_nanotime.nt_shift = rntp->rnt_shift;
-
-       commpage_set_nanotime(&cp_nanotime);
+       commpage_set_nanotime(rntp->tsc_base, rntp->ns_base, rntp->scale, rntp->shift);
  }
  
-static void
-rtc_nanotime_init(void)
+/*
+ * rtc_nanotime_init:
+ *
+ * Intialize the nanotime info from the base time.
+ */
+static inline void
+_rtc_nanotime_init(pal_rtc_nanotime_t *rntp, uint64_t base)
  {
-       rtc_nanotime_t  *rntp = &current_cpu_datap()->cpu_rtc_nanotime;
-       rtc_nanotime_t  *master_rntp = &cpu_datap(master_cpu)->cpu_rtc_nanotime;
+       uint64_t        tsc = rdtsc64();
  
-       if (cpu_number() == master_cpu) {
-               rntp->rnt_tsc = rdtsc64();
-               rntp->rnt_nanos = tsc_to_nanoseconds(rntp->rnt_tsc);
-               rntp->rnt_scale = rtc_quant_scale;
-               rntp->rnt_shift = rtc_quant_shift;
-               rntp->rnt_step_tsc = 0ULL;
-               rntp->rnt_step_nanos = 0ULL;
-       } else {
-               /*
-                * Copy master processor's nanotime info.
-                * Loop required in case this changes while copying.
-                */
-               do {
-                       *rntp = *master_rntp;
-               } while (rntp->rnt_tsc != master_rntp->rnt_tsc);
-       }
+       _pal_rtc_nanotime_store(tsc, base, rntp->scale, rntp->shift, rntp);
  }
  
-static inline void
-_rtc_nanotime_update(rtc_nanotime_t *rntp, uint64_t    tsc)
+void
+rtc_nanotime_init(uint64_t base)
  {
-       uint64_t        tsc_delta;
-       uint64_t        ns_delta;
-
-       tsc_delta = tsc - rntp->rnt_step_tsc;
-       ns_delta = tsc_to_nanoseconds(tsc_delta);
-       rntp->rnt_nanos = rntp->rnt_step_nanos + ns_delta;
-       rntp->rnt_tsc = tsc;
+       _rtc_nanotime_init(&pal_rtc_nanotime_info, base);
+       rtc_nanotime_set_commpage(&pal_rtc_nanotime_info);
  }
  
-static void
-rtc_nanotime_update(void)
+/*
+ * rtc_nanotime_init_commpage:
+ *
+ * Call back from the commpage initialization to
+ * cause the commpage data to be filled in once the
+ * commpages have been created.
+ */
+void
+rtc_nanotime_init_commpage(void)
  {
-       rtc_nanotime_t  *rntp = &current_cpu_datap()->cpu_rtc_nanotime;
+       spl_t                   s = splclock();
  
-       assert(get_preemption_level() > 0);
-       assert(!ml_get_interrupts_enabled());
-        
-       _rtc_nanotime_update(rntp, rdtsc64());
-       rtc_nanotime_set_commpage(rntp);
+       rtc_nanotime_set_commpage(&pal_rtc_nanotime_info);
+       splx(s);
  }
  
-static void
-rtc_nanotime_scale_update(void)
+/*
+ * rtc_nanotime_read:
+ *
+ * Returns the current nanotime value, accessable from any
+ * context.
+ */
+static inline uint64_t
+rtc_nanotime_read(void)
  {
-       rtc_nanotime_t  *rntp = &current_cpu_datap()->cpu_rtc_nanotime;
-       uint64_t        tsc = rdtsc64();
-
-       assert(!ml_get_interrupts_enabled());
-        
-       /*
-        * Update time based on past scale.
-        */
-       _rtc_nanotime_update(rntp, tsc);
-
-       /*
-        * Update scale and timestamp this update.
-        */
-       rntp->rnt_scale = rtc_quant_scale;
-       rntp->rnt_shift = rtc_quant_shift;
-       rntp->rnt_step_tsc = rntp->rnt_tsc;
-       rntp->rnt_step_nanos = rntp->rnt_nanos;
-
-       /* Export update to userland */
-       rtc_nanotime_set_commpage(rntp);
+       return  _rtc_nanotime_read(&pal_rtc_nanotime_info);
  }
  
-static uint64_t
-_rtc_nanotime_read(void)
+/*
+ * rtc_clock_napped:
+ *
+ * Invoked from power management when we exit from a low C-State (>= C4)
+ * and the TSC has stopped counting.  The nanotime data is updated according
+ * to the provided value which represents the new value for nanotime.
+ */
+void
+rtc_clock_napped(uint64_t base, uint64_t tsc_base)
  {
-       rtc_nanotime_t  *rntp = &current_cpu_datap()->cpu_rtc_nanotime;
-       uint64_t        rnt_tsc;
-       uint32_t        rnt_scale;
-       uint32_t        rnt_shift;
-       uint64_t        rnt_nanos;
+       pal_rtc_nanotime_t      *rntp = &pal_rtc_nanotime_info;
+       uint64_t        oldnsecs;
+       uint64_t        newnsecs;
         uint64_t        tsc;
-       uint64_t        tsc_delta;
-
-       rnt_scale = rntp->rnt_scale;
-       if (rnt_scale == 0)
-               return 0ULL;
  
-       rnt_shift = rntp->rnt_shift;
-       rnt_nanos = rntp->rnt_nanos;
-       rnt_tsc = rntp->rnt_tsc;
+       assert(!ml_get_interrupts_enabled());
         tsc = rdtsc64();
-
-       tsc_delta = tsc - rnt_tsc;
-       if ((tsc_delta >> 32) != 0)
-               return rnt_nanos + tsc_to_nanoseconds(tsc_delta);
-
-       /* Let the compiler optimize(?): */
-       if (rnt_shift == 32)
-               return rnt_nanos + ((tsc_delta * rnt_scale) >> 32);     
-       else 
-               return rnt_nanos + ((tsc_delta * rnt_scale) >> rnt_shift);
-}
-
-uint64_t
-rtc_nanotime_read(void)
-{
-       uint64_t        result;
-       uint64_t        rnt_tsc;
-       rtc_nanotime_t  *rntp = &current_cpu_datap()->cpu_rtc_nanotime;
-
+       oldnsecs = rntp->ns_base + _rtc_tsc_to_nanoseconds(tsc - rntp->tsc_base, rntp);
+       newnsecs = base + _rtc_tsc_to_nanoseconds(tsc - tsc_base, rntp);
+       
         /*
-        * Use timestamp to ensure the uptime record isn't changed.
-        * This avoids disabling interrupts.
-        * And not this is a per-cpu structure hence no locking.
+        * Only update the base values if time using the new base values
+        * is later than the time using the old base values.
          */
-       do {
-               rnt_tsc = rntp->rnt_tsc;
-               result = _rtc_nanotime_read();
-       } while (rnt_tsc != rntp->rnt_tsc);
-
-       return result;
+       if (oldnsecs < newnsecs) {
+           _pal_rtc_nanotime_store(tsc_base, base, rntp->scale, rntp->shift, rntp);
+           rtc_nanotime_set_commpage(rntp);
+       }
  }
  
-
  /*
- * This function is called by the speed-step driver when a
- * change of cpu clock frequency is about to occur.
- * The scale is not changed until rtc_clock_stepped() is called.
- * Between these times there is an uncertainty is exactly when
- * the change takes effect. FIXME: by using another timing source
- * we could eliminate this error.
+ * Invoked from power management to correct the SFLM TSC entry drift problem:
+ * a small delta is added to the tsc_base.  This is equivalent to nudgin time
+ * backwards.  We require this to be on the order of a TSC quantum which won't
+ * cause callers of mach_absolute_time() to see time going backwards!
   */
  void
-rtc_clock_stepping(__unused uint32_t new_frequency,
-                  __unused uint32_t old_frequency)
+rtc_clock_adjust(uint64_t tsc_base_delta)
  {
-       boolean_t       istate;
+    pal_rtc_nanotime_t *rntp = &pal_rtc_nanotime_info;
  
-       istate = ml_set_interrupts_enabled(FALSE);
-       rtc_nanotime_scale_update();
-       ml_set_interrupts_enabled(istate);
+    assert(!ml_get_interrupts_enabled());
+    assert(tsc_base_delta < 100ULL);   /* i.e. it's small */
+    _rtc_nanotime_adjust(tsc_base_delta, rntp);
+    rtc_nanotime_set_commpage(rntp);
  }
  
-/*
- * This function is called by the speed-step driver when a
- * change of cpu clock frequency has just occured. This change
- * is expressed as a ratio relative to the boot clock rate.
- */
  void
-rtc_clock_stepped(uint32_t new_frequency, uint32_t old_frequency)
+rtc_clock_stepping(__unused uint32_t new_frequency,
+                  __unused uint32_t old_frequency)
  {
-       boolean_t       istate;
+       panic("rtc_clock_stepping unsupported");
+}
  
-       istate = ml_set_interrupts_enabled(FALSE);
-       if (rtc_boot_frequency == 0) {
-               /*
-                * At the first ever stepping, old frequency is the real
-                * initial clock rate. This step and all others are based
-                * relative to this initial frequency at which the tsc
-                * calibration was made. Hence we must remember this base
-                * frequency as reference.
-                */
-               rtc_boot_frequency = old_frequency;
-       }
-       rtc_set_cyc_per_sec(rtc_cycle_count * new_frequency /
-                               rtc_boot_frequency);
-       rtc_nanotime_scale_update();
-       ml_set_interrupts_enabled(istate);
+void
+rtc_clock_stepped(__unused uint32_t new_frequency,
+                 __unused uint32_t old_frequency)
+{
+       panic("rtc_clock_stepped unsupported");
  }
  
  /*
- * rtc_sleep_wakeup() is called from acpi on awakening from a S3 sleep
+ * rtc_sleep_wakeup:
+ *
+ * Invoked from power management when we have awoken from a sleep (S3)
+ * and the TSC has been reset, or from Deep Idle (S0) sleep when the TSC
+ * has progressed.  The nanotime data is updated based on the passed-in value.
+ *
+ * The caller must guarantee non-reentrancy.
   */
  void
-rtc_sleep_wakeup(void)
+rtc_sleep_wakeup(
+       uint64_t                base)
  {
-       rtc_nanotime_t  *rntp = &current_cpu_datap()->cpu_rtc_nanotime;
-
-       boolean_t       istate;
-
-       istate = ml_set_interrupts_enabled(FALSE);
+       /* Set fixed configuration for lapic timers */
+       rtc_timer->rtc_config();
  
         /*
          * Reset nanotime.
          * The timestamp counter will have been reset
          * but nanotime (uptime) marches onward.
-        * We assume that we're still at the former cpu frequency.
          */
-       rntp->rnt_tsc = rdtsc64();
-       rntp->rnt_step_tsc = 0ULL;
-       rntp->rnt_step_nanos = rntp->rnt_nanos;
-       rtc_nanotime_set_commpage(rntp);
-
-       /* Restart tick interrupts from the LAPIC timer */
-       rtc_lapic_start_ticking();
+       rtc_nanotime_init(base);
+}
  
-       ml_set_interrupts_enabled(istate);
+void
+rtc_decrementer_configure(void) {
+       rtc_timer->rtc_config();
+}
+/*
+ * rtclock_early_init() is called very early at boot to
+ * establish mach_absolute_time() and set it to zero.
+ */
+void
+rtclock_early_init(void)
+{
+       assert(tscFreq);
+       rtc_set_timescale(tscFreq);
  }
  
  /*
@@ -872,100 +285,85 @@ rtc_sleep_wakeup(void)
   * In addition, various variables used to support the clock are initialized.
   */
  int
-sysclk_init(void)
+rtclock_init(void)
  {
         uint64_t        cycles;
  
-       mp_disable_preemption();
+       assert(!ml_get_interrupts_enabled());
+
         if (cpu_number() == master_cpu) {
+
+               assert(tscFreq);
+
                 /*
-                * Perform calibration.
-                * The PIT is used as the reference to compute how many
-                * TCS counts (cpu clock cycles) occur per second.
+                * Adjust and set the exported cpu speed.
                  */
-               rtc_cycle_count = timeRDTSC();
-               cycles = rtc_set_cyc_per_sec(rtc_cycle_count);
+               cycles = rtc_export_speed(tscFreq);
  
                 /*
                  * Set min/max to actual.
                  * ACPI may update these later if speed-stepping is detected.
                  */
-               gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles;
-               gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles;
-               printf("[RTCLOCK] frequency %llu (%llu)\n",
-                      cycles, rtc_cyc_per_sec);
-
-               rtc_lapic_timer_calibrate();
-
-               /* Minimum interval is 1usec */
-               rtc_decrementer_min = deadline_to_decrementer(NSEC_PER_USEC,
-                                                               0ULL);
-               /* Point LAPIC interrupts to hardclock() */
-               lapic_set_timer_func((i386_intr_func_t) rtclock_intr);
+               gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles;
+               gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles;
  
+               rtc_timer_init();
                 clock_timebase_init();
-               rtc_initialized = TRUE;
+               ml_init_lock_timeout();
+               ml_init_delay_spin_threshold(10);
         }
  
-       rtc_nanotime_init();
-
-       rtc_lapic_start_ticking();
-
-       mp_enable_preemption();
+       /* Set fixed configuration for lapic timers */
+       rtc_timer->rtc_config();
+       rtc_timer_start();
  
         return (1);
  }
  
-/*
- * Get the clock device time. This routine is responsible
- * for converting the device's machine dependent time value
- * into a canonical mach_timespec_t value.
- */
-static kern_return_t
-sysclk_gettime_internal(
-       mach_timespec_t *cur_time)      /* OUT */
-{
-       *cur_time = tsc_to_timespec();
-       return (KERN_SUCCESS);
-}
+// utility routine 
+// Code to calculate how many processor cycles are in a second...
  
-kern_return_t
-sysclk_gettime(
-       mach_timespec_t *cur_time)      /* OUT */
+static void
+rtc_set_timescale(uint64_t cycles)
  {
-       return sysclk_gettime_internal(cur_time);
-}
+       pal_rtc_nanotime_t      *rntp = &pal_rtc_nanotime_info;
+       uint32_t    shift = 0;
+    
+       /* the "scale" factor will overflow unless cycles>SLOW_TSC_THRESHOLD */
+    
+       while ( cycles <= SLOW_TSC_THRESHOLD) {
+               shift++;
+               cycles <<= 1;
+       }
+       
+       rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / cycles);
  
-void
-sysclk_gettime_interrupts_disabled(
-       mach_timespec_t *cur_time)      /* OUT */
-{
-       (void) sysclk_gettime_internal(cur_time);
-}
+       rntp->shift = shift;
  
-// utility routine 
-// Code to calculate how many processor cycles are in a second...
+       /*
+        * On some platforms, the TSC is not reset at warm boot. But the
+        * rebase time must be relative to the current boot so we can't use
+        * mach_absolute_time(). Instead, we convert the TSC delta since boot
+        * to nanoseconds.
+        */
+       if (tsc_rebase_abs_time == 0)
+               tsc_rebase_abs_time = _rtc_tsc_to_nanoseconds(
+                                               rdtsc64() - tsc_at_boot, rntp);
+
+       rtc_nanotime_init(0);
+}
  
  static uint64_t
-rtc_set_cyc_per_sec(uint64_t cycles)
+rtc_export_speed(uint64_t cyc_per_sec)
  {
+       pal_rtc_nanotime_t      *rntp = &pal_rtc_nanotime_info;
+       uint64_t        cycles;
  
-        if (cycles > (NSEC_PER_SEC/20)) {
-            // we can use just a "fast" multiply to get nanos
-           rtc_quant_shift = 32;
-            rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, cycles);
-            rtclock.timebase_const.numer = rtc_quant_scale; // timeRDTSC is 1/20
-           rtclock.timebase_const.denom = RTC_FAST_DENOM;
-        } else {
-           rtc_quant_shift = 26;
-            rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, cycles);
-            rtclock.timebase_const.numer = NSEC_PER_SEC/20; // timeRDTSC is 1/20
-            rtclock.timebase_const.denom = cycles;
-        }
-       rtc_cyc_per_sec = cycles*20;    // multiply it by 20 and we are done..
-                                       // BUT we also want to calculate...
-
-        cycles = ((rtc_cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR/2))
+       if (rntp->shift != 0 )
+               printf("Slow TSC, rtc_nanotime.shift == %d\n", rntp->shift);
+    
+       /* Round: */
+        cycles = ((cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR/2))
                         / UI_CPUFREQ_ROUNDING_FACTOR)
                                 * UI_CPUFREQ_ROUNDING_FACTOR;
  
@@ -979,313 +377,34 @@ rtc_set_cyc_per_sec(uint64_t cycles)
          }
          gPEClockFrequencyInfo.cpu_frequency_hz = cycles;
  
-       kprintf("[RTCLOCK] frequency %llu (%llu)\n", cycles, rtc_cyc_per_sec);
+       kprintf("[RTCLOCK] frequency %llu (%llu)\n", cycles, cyc_per_sec);
         return(cycles);
  }
  
  void
  clock_get_system_microtime(
-       uint32_t                        *secs,
-       uint32_t                        *microsecs)
+       clock_sec_t                     *secs,
+       clock_usec_t            *microsecs)
  {
-       mach_timespec_t         now;
-
-       (void) sysclk_gettime_internal(&now);
+       uint64_t        now = rtc_nanotime_read();
  
-       *secs = now.tv_sec;
-       *microsecs = now.tv_nsec / NSEC_PER_USEC;
+       _absolutetime_to_microtime(now, secs, microsecs);
  }
  
  void
  clock_get_system_nanotime(
-       uint32_t                        *secs,
-       uint32_t                        *nanosecs)
-{
-       mach_timespec_t         now;
-
-       (void) sysclk_gettime_internal(&now);
-
-       *secs = now.tv_sec;
-       *nanosecs = now.tv_nsec;
-}
-
-/*
- * Get clock device attributes.
- */
-kern_return_t
-sysclk_getattr(
-       clock_flavor_t          flavor,
-       clock_attr_t            attr,           /* OUT */
-       mach_msg_type_number_t  *count)         /* IN/OUT */
-{
-       if (*count != 1)
-               return (KERN_FAILURE);
-       switch (flavor) {
-
-       case CLOCK_GET_TIME_RES:        /* >0 res */
-               *(clock_res_t *) attr = rtc_intr_nsec;
-               break;
-
-       case CLOCK_ALARM_CURRES:        /* =0 no alarm */
-       case CLOCK_ALARM_MAXRES:
-       case CLOCK_ALARM_MINRES:
-               *(clock_res_t *) attr = 0;
-               break;
-
-       default:
-               return (KERN_INVALID_VALUE);
-       }
-       return (KERN_SUCCESS);
-}
-
-/*
- * Set next alarm time for the clock device. This call
- * always resets the time to deliver an alarm for the
- * clock.
- */
-void
-sysclk_setalarm(
-       mach_timespec_t *alarm_time)
-{
-       timer_call_enter(&rtclock_alarm_timer,
-                        (uint64_t) alarm_time->tv_sec * NSEC_PER_SEC
-                               + alarm_time->tv_nsec);
-}
-
-/*
- * Configure the calendar clock.
- */
-int
-calend_config(void)
-{
-       return bbc_config();
-}
-
-/*
- * Initialize calendar clock.
- */
-int
-calend_init(void)
-{
-       return (1);
-}
-
-/*
- * Get the current clock time.
- */
-kern_return_t
-calend_gettime(
-       mach_timespec_t *cur_time)      /* OUT */
-{
-       spl_t           s;
-
-       RTC_LOCK(s);
-       if (!rtclock.calend_is_set) {
-               RTC_UNLOCK(s);
-               return (KERN_FAILURE);
-       }
-
-       (void) sysclk_gettime_internal(cur_time);
-       ADD_MACH_TIMESPEC(cur_time, &rtclock.calend_offset);
-       RTC_UNLOCK(s);
-
-       return (KERN_SUCCESS);
-}
-
-void
-clock_get_calendar_microtime(
-       uint32_t                        *secs,
-       uint32_t                        *microsecs)
-{
-       mach_timespec_t         now;
-
-       calend_gettime(&now);
-
-       *secs = now.tv_sec;
-       *microsecs = now.tv_nsec / NSEC_PER_USEC;
-}
-
-void
-clock_get_calendar_nanotime(
-       uint32_t                        *secs,
-       uint32_t                        *nanosecs)
-{
-       mach_timespec_t         now;
-
-       calend_gettime(&now);
-
-       *secs = now.tv_sec;
-       *nanosecs = now.tv_nsec;
-}
-
-void
-clock_set_calendar_microtime(
-       uint32_t                        secs,
-       uint32_t                        microsecs)
-{
-       mach_timespec_t         new_time, curr_time;
-       uint32_t                        old_offset;
-       spl_t           s;
-
-       new_time.tv_sec = secs;
-       new_time.tv_nsec = microsecs * NSEC_PER_USEC;
-
-       RTC_LOCK(s);
-       old_offset = rtclock.calend_offset.tv_sec;
-       (void) sysclk_gettime_internal(&curr_time);
-       rtclock.calend_offset = new_time;
-       SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time);
-       rtclock.boottime += rtclock.calend_offset.tv_sec - old_offset;
-       rtclock.calend_is_set = TRUE;
-       RTC_UNLOCK(s);
-
-       (void) bbc_settime(&new_time);
-
-       host_notify_calendar_change();
-}
-
-/*
- * Get clock device attributes.
- */
-kern_return_t
-calend_getattr(
-       clock_flavor_t          flavor,
-       clock_attr_t            attr,           /* OUT */
-       mach_msg_type_number_t  *count)         /* IN/OUT */
+       clock_sec_t                     *secs,
+       clock_nsec_t            *nanosecs)
  {
-       if (*count != 1)
-               return (KERN_FAILURE);
-       switch (flavor) {
-
-       case CLOCK_GET_TIME_RES:        /* >0 res */
-               *(clock_res_t *) attr = rtc_intr_nsec;
-               break;
-
-       case CLOCK_ALARM_CURRES:        /* =0 no alarm */
-       case CLOCK_ALARM_MINRES:
-       case CLOCK_ALARM_MAXRES:
-               *(clock_res_t *) attr = 0;
-               break;
-
-       default:
-               return (KERN_INVALID_VALUE);
-       }
-       return (KERN_SUCCESS);
-}
-
-#define tickadj                (40*NSEC_PER_USEC)      /* "standard" skew, ns / tick */
-#define        bigadj          (NSEC_PER_SEC)          /* use 10x skew above bigadj ns */
+       uint64_t        now = rtc_nanotime_read();
  
-uint32_t
-clock_set_calendar_adjtime(
-       int32_t                         *secs,
-       int32_t                         *microsecs)
-{
-       int64_t                 total, ototal;
-       uint32_t                interval = 0;
-       spl_t                   s;
-
-       total = (int64_t)*secs * NSEC_PER_SEC + *microsecs * NSEC_PER_USEC;
-
-       RTC_LOCK(s);
-       ototal = rtclock.calend_adjtotal;
-
-       if (total != 0) {
-               int32_t         delta = tickadj;
-
-               if (total > 0) {
-                       if (total > bigadj)
-                               delta *= 10;
-                       if (delta > total)
-                               delta = total;
-               }
-               else {
-                       if (total < -bigadj)
-                               delta *= 10;
-                       delta = -delta;
-                       if (delta < total)
-                               delta = total;
-               }
-
-               rtclock.calend_adjtotal = total;
-               rtclock.calend_adjdelta = delta;
-
-               interval = NSEC_PER_HZ;
-       }
-       else
-               rtclock.calend_adjdelta = rtclock.calend_adjtotal = 0;
-
-       RTC_UNLOCK(s);
-
-       if (ototal == 0)
-               *secs = *microsecs = 0;
-       else {
-               *secs = ototal / NSEC_PER_SEC;
-               *microsecs = ototal % NSEC_PER_SEC;
-       }
-
-       return (interval);
-}
-
-uint32_t
-clock_adjust_calendar(void)
-{
-       uint32_t                interval = 0;
-       int32_t                 delta;
-       spl_t                   s;
-
-       RTC_LOCK(s);
-       delta = rtclock.calend_adjdelta;
-       ADD_MACH_TIMESPEC_NSEC(&rtclock.calend_offset, delta);
-
-       rtclock.calend_adjtotal -= delta;
-
-       if (delta > 0) {
-               if (delta > rtclock.calend_adjtotal)
-                       rtclock.calend_adjdelta = rtclock.calend_adjtotal;
-       }
-       else
-       if (delta < 0) {
-               if (delta < rtclock.calend_adjtotal)
-                       rtclock.calend_adjdelta = rtclock.calend_adjtotal;
-       }
-
-       if (rtclock.calend_adjdelta != 0)
-               interval = NSEC_PER_HZ;
-
-       RTC_UNLOCK(s);
-
-       return (interval);
+       _absolutetime_to_nanotime(now, secs, nanosecs);
  }
  
  void
-clock_initialize_calendar(void)
+clock_gettimeofday_set_commpage(uint64_t abstime, uint64_t sec, uint64_t frac, uint64_t scale, uint64_t tick_per_sec)
  {
-       mach_timespec_t bbc_time, curr_time;
-       spl_t           s;
-
-       if (bbc_gettime(&bbc_time) != KERN_SUCCESS)
-               return;
-
-       RTC_LOCK(s);
-       if (rtclock.boottime == 0)
-               rtclock.boottime = bbc_time.tv_sec;
-       (void) sysclk_gettime_internal(&curr_time);
-       rtclock.calend_offset = bbc_time;
-       SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time);
-       rtclock.calend_is_set = TRUE;
-       RTC_UNLOCK(s);
-
-       host_notify_calendar_change();
-}
-
-void
-clock_get_boottime_nanotime(
-       uint32_t                        *secs,
-       uint32_t                        *nanosecs)
-{
-       *secs = rtclock.boottime;
-       *nanosecs = 0;
+       commpage_set_timestamp(abstime, sec, frac, scale, tick_per_sec);
  }
  
  void
@@ -1295,182 +414,106 @@ clock_timebase_info(
         info->numer = info->denom =  1;
  }      
  
-void
-clock_set_timer_deadline(
-       uint64_t                        deadline)
-{
-       spl_t           s;
-       cpu_data_t      *pp = current_cpu_datap();
-       rtclock_timer_t *mytimer = &pp->cpu_rtc_timer;
-       uint64_t        abstime;
-       uint64_t        decr;
-
-       assert(get_preemption_level() > 0);
-       assert(rtclock_timer_expire);
-
-       RTC_INTRS_OFF(s);
-       mytimer->deadline = deadline;
-       mytimer->is_set = TRUE;
-       if (!mytimer->has_expired) {
-               abstime = mach_absolute_time();
-               if (mytimer->deadline < pp->cpu_rtc_tick_deadline) {
-                       decr = deadline_to_decrementer(mytimer->deadline,
-                                                      abstime);
-                       rtc_lapic_set_timer(decr);
-                       pp->cpu_rtc_intr_deadline = mytimer->deadline;
-                       KERNEL_DEBUG_CONSTANT(
-                               MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) |
-                                       DBG_FUNC_NONE, decr, 2, 0, 0, 0);
-               }
-       }
-       RTC_INTRS_ON(s);
-}
-
-void
-clock_set_timer_func(
-       clock_timer_func_t              func)
-{
-       if (rtclock_timer_expire == NULL)
-               rtclock_timer_expire = func;
-}
-
  /*
   * Real-time clock device interrupt.
   */
  void
-rtclock_intr(struct i386_interrupt_state *regs)
+rtclock_intr(
+       x86_saved_state_t       *tregs)
  {
-       uint64_t        abstime;
-       uint32_t        latency;
-       uint64_t        decr;
-       uint64_t        decr_tick;
-       uint64_t        decr_timer;
-       cpu_data_t      *pp = current_cpu_datap();
-       rtclock_timer_t *mytimer = &pp->cpu_rtc_timer;
+        uint64_t       rip;
+       boolean_t       user_mode = FALSE;
  
         assert(get_preemption_level() > 0);
         assert(!ml_get_interrupts_enabled());
  
-        abstime = _rtc_nanotime_read();
-       latency = (uint32_t) abstime - pp->cpu_rtc_intr_deadline;
-       if (pp->cpu_rtc_tick_deadline <= abstime) {
-               rtc_nanotime_update();
-               clock_deadline_for_periodic_event(
-                       NSEC_PER_HZ, abstime, &pp->cpu_rtc_tick_deadline);
-               hertz_tick(
-#if STAT_TIME
-                          NSEC_PER_HZ,
-#endif
-                          (regs->efl & EFL_VM) || ((regs->cs & 0x03) != 0),
-                          regs->eip);
-       }
+       if (is_saved_state64(tregs) == TRUE) {
+               x86_saved_state64_t     *regs;
+                 
+               regs = saved_state64(tregs);
  
-       abstime = _rtc_nanotime_read();
-       if (mytimer->is_set && mytimer->deadline <= abstime) {
-               mytimer->has_expired = TRUE;
-               mytimer->is_set = FALSE;
-               (*rtclock_timer_expire)(abstime);
-               assert(!ml_get_interrupts_enabled());
-               mytimer->has_expired = FALSE;
-       }
+               if (regs->isf.cs & 0x03)
+                       user_mode = TRUE;
+               rip = regs->isf.rip;
+       } else {
+               x86_saved_state32_t     *regs;
  
-       /* Log the interrupt service latency (-ve value expected by tool) */
-       KERNEL_DEBUG_CONSTANT(
-               MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) | DBG_FUNC_NONE,
-               -latency, (uint32_t)regs->eip, 0, 0, 0);
+               regs = saved_state32(tregs);
  
-       abstime = _rtc_nanotime_read();
-       decr_tick = deadline_to_decrementer(pp->cpu_rtc_tick_deadline, abstime);
-       decr_timer = (mytimer->is_set) ?
-                       deadline_to_decrementer(mytimer->deadline, abstime) :
-                       DECREMENTER_MAX;
-       decr = MIN(decr_tick, decr_timer);
-       pp->cpu_rtc_intr_deadline = abstime + decr;
+               if (regs->cs & 0x03)
+                       user_mode = TRUE;
+               rip = regs->eip;
+       }
  
-       rtc_lapic_set_timer(decr);
+       /* call the generic etimer */
+       timer_intr(user_mode, rip);
+}
  
-       /* Log the new decrementer value */
-       KERNEL_DEBUG_CONSTANT(
-               MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE,
-               decr, 3, 0, 0, 0);
  
-}
+/*
+ *     Request timer pop from the hardware 
+ */
  
-static void
-rtclock_alarm_expire(
-       __unused timer_call_param_t     p0,
-       __unused timer_call_param_t     p1)
+uint64_t
+setPop(uint64_t time)
  {
-       mach_timespec_t clock_time;
+       uint64_t        now;
+       uint64_t        pop;
  
-       (void) sysclk_gettime_internal(&clock_time);
+       /* 0 and EndOfAllTime are special-cases for "clear the timer" */
+       if (time == 0 || time == EndOfAllTime ) {
+               time = EndOfAllTime;
+               now = 0;
+               pop = rtc_timer->rtc_set(0, 0);
+       } else {
+               now = rtc_nanotime_read();      /* The time in nanoseconds */
+               pop = rtc_timer->rtc_set(time, now);
+       }
  
-       clock_alarm_intr(SYSTEM_CLOCK, &clock_time);
-}
+       /* Record requested and actual deadlines set */
+       x86_lcpu()->rtcDeadline = time;
+       x86_lcpu()->rtcPop      = pop;
  
-void
-clock_get_uptime(
-       uint64_t                *result)
-{
-        *result = rtc_nanotime_read();
+       return pop - now;
  }
  
  uint64_t
  mach_absolute_time(void)
  {
-        return rtc_nanotime_read();
+       return rtc_nanotime_read();
  }
  
-void
-absolutetime_to_microtime(
-       uint64_t                        abstime,
-       uint32_t                        *secs,
-       uint32_t                        *microsecs)
+uint64_t
+mach_approximate_time(void)
  {
-       uint32_t        remain;
-
-       asm volatile(
-                       "divl %3"
-                               : "=a" (*secs), "=d" (remain)
-                               : "A" (abstime), "r" (NSEC_PER_SEC));
-       asm volatile(
-                       "divl %3"
-                               : "=a" (*microsecs)
-                               : "0" (remain), "d" (0), "r" (NSEC_PER_USEC));
+       return rtc_nanotime_read();
  }
  
  void
-clock_interval_to_deadline(
+clock_interval_to_absolutetime_interval(
         uint32_t                interval,
         uint32_t                scale_factor,
         uint64_t                *result)
  {
-       uint64_t                abstime;
-
-       clock_get_uptime(result);
-
-       clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
-
-       *result += abstime;
+       *result = (uint64_t)interval * scale_factor;
  }
  
  void
-clock_interval_to_absolutetime_interval(
-       uint32_t                interval,
-       uint32_t                scale_factor,
-       uint64_t                *result)
+absolutetime_to_microtime(
+       uint64_t                        abstime,
+       clock_sec_t                     *secs,
+       clock_usec_t            *microsecs)
  {
-       *result = (uint64_t)interval * scale_factor;
+       _absolutetime_to_microtime(abstime, secs, microsecs);
  }
  
  void
-clock_absolutetime_interval_to_deadline(
-       uint64_t                abstime,
-       uint64_t                *result)
+nanotime_to_absolutetime(
+       clock_sec_t                     secs,
+       clock_nsec_t            nanosecs,
+       uint64_t                        *result)
  {
-       clock_get_uptime(result);
-
-       *result += abstime;
+       *result = ((uint64_t)secs * NSEC_PER_SEC) + nanosecs;
  }
  
  void
@@ -1491,12 +534,11 @@ nanoseconds_to_absolutetime(
  
  void
  machine_delay_until(
+       uint64_t interval,
         uint64_t                deadline)
  {
-       uint64_t                now;
-
-       do {
+       (void)interval;
+       while (mach_absolute_time() < deadline) {
                 cpu_pause();
-               now = mach_absolute_time();
-       } while (now < deadline);
+       } 
  }