]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/rtclock.c
xnu-792.24.17.tar.gz
[apple/xnu.git] / osfmk / i386 / rtclock.c
index 43ab1f3be79d6f54858c4cc65e902132a903415e..d6368afa65c575e47f256fad368820a87b650393 100644 (file)
@@ -1,24 +1,21 @@
 /*
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
  * 
  * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
  * 
  * @APPLE_LICENSE_HEADER_END@
  */
  * 
  * @APPLE_LICENSE_HEADER_END@
  */
 /*
  *     File:           i386/rtclock.c
  *     Purpose:        Routines for handling the machine dependent
 /*
  *     File:           i386/rtclock.c
  *     Purpose:        Routines for handling the machine dependent
- *                     real-time clock. This clock is generated by
- *                     the Intel 8254 Programmable Interval Timer.
+ *                     real-time clock. Historically, this clock is
+ *                     generated by the Intel 8254 Programmable Interval
+ *                     Timer, but local apic timers are now used for
+ *                     this purpose with the master time reference being
+ *                     the cpu clock counted by the timestamp MSR.
  */
 
  */
 
-#include <cpus.h>
 #include <platforms.h>
 #include <platforms.h>
-#include <mp_v1_1.h>
 #include <mach_kdb.h>
 #include <mach_kdb.h>
-#include <kern/cpu_number.h>
+
+#include <mach/mach_types.h>
+
 #include <kern/cpu_data.h>
 #include <kern/cpu_data.h>
+#include <kern/cpu_number.h>
 #include <kern/clock.h>
 #include <kern/clock.h>
+#include <kern/host_notify.h>
 #include <kern/macro_help.h>
 #include <kern/misc_protos.h>
 #include <kern/spl.h>
 #include <kern/macro_help.h>
 #include <kern/misc_protos.h>
 #include <kern/spl.h>
-#include <machine/mach_param.h>        /* HZ */
+#include <kern/assert.h>
 #include <mach/vm_prot.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>                /* for kernel_map */
 #include <mach/vm_prot.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>                /* for kernel_map */
 #include <i386/pit.h>
 #include <i386/pio.h>
 #include <i386/misc_protos.h>
 #include <i386/pit.h>
 #include <i386/pio.h>
 #include <i386/misc_protos.h>
-#include <i386/rtclock_entries.h>
-#include <i386/hardclock_entries.h>
+#include <i386/proc_reg.h>
+#include <i386/machine_cpu.h>
+#include <i386/mp.h>
+#include <i386/cpuid.h>
+#include <i386/cpu_data.h>
+#include <i386/cpu_threads.h>
+#include <i386/perfmon.h>
+#include <i386/machine_routines.h>
+#include <i386/AT386/bbclock_entries.h>
+#include <pexpert/pexpert.h>
+#include <machine/limits.h>
+#include <machine/commpage.h>
+#include <sys/kdebug.h>
+
+#define MAX(a,b) (((a)>(b))?(a):(b))
+#define MIN(a,b) (((a)>(b))?(b):(a))
+
+#define NSEC_PER_HZ                    (NSEC_PER_SEC / 100) /* nsec per tick */
+
+#define UI_CPUFREQ_ROUNDING_FACTOR     10000000
 
 int            sysclk_config(void);
 
 
 int            sysclk_config(void);
 
@@ -66,37 +86,16 @@ kern_return_t       sysclk_getattr(
        clock_attr_t                    attr,
        mach_msg_type_number_t  *count);
 
        clock_attr_t                    attr,
        mach_msg_type_number_t  *count);
 
-kern_return_t  sysclk_setattr(
-       clock_flavor_t                  flavor,
-       clock_attr_t                    attr,
-       mach_msg_type_number_t  count);
-
 void           sysclk_setalarm(
        mach_timespec_t                 *alarm_time);
 
 void           sysclk_setalarm(
        mach_timespec_t                 *alarm_time);
 
-extern void (*IOKitRegisterInterruptHook)(void *,  int irq, int isclock);
-
-/*
- * Inlines to get timestamp counter value.
- */
-
-static inline void rdtsc_hilo(uint32_t *hi, uint32_t *lo) {
-        asm volatile("rdtsc": "=a" (*lo), "=d" (*hi));
-}
-
-static inline uint64_t rdtsc_64(void) {
-       uint64_t result;
-        asm volatile("rdtsc": "=A" (result));
-       return result;
-}
-
 /*
  * Lists of clock routines.
  */
 struct clock_ops  sysclk_ops = {
        sysclk_config,                  sysclk_init,
        sysclk_gettime,                 0,
 /*
  * Lists of clock routines.
  */
 struct clock_ops  sysclk_ops = {
        sysclk_config,                  sysclk_init,
        sysclk_gettime,                 0,
-       sysclk_getattr,                 sysclk_setattr,
+       sysclk_getattr,                 0,
        sysclk_setalarm,
 };
 
        sysclk_setalarm,
 };
 
@@ -107,9 +106,6 @@ int         calend_init(void);
 kern_return_t  calend_gettime(
        mach_timespec_t                 *cur_time);
 
 kern_return_t  calend_gettime(
        mach_timespec_t                 *cur_time);
 
-kern_return_t  calend_settime(
-       mach_timespec_t                 *cur_time);
-
 kern_return_t  calend_getattr(
        clock_flavor_t                  flavor,
        clock_attr_t                    attr,
 kern_return_t  calend_getattr(
        clock_flavor_t                  flavor,
        clock_attr_t                    attr,
@@ -117,63 +113,65 @@ kern_return_t     calend_getattr(
 
 struct clock_ops calend_ops = {
        calend_config,                  calend_init,
 
 struct clock_ops calend_ops = {
        calend_config,                  calend_init,
-       calend_gettime,                 calend_settime,
+       calend_gettime,                 0,
        calend_getattr,                 0,
        0,
 };
 
 /* local data declarations */
        calend_getattr,                 0,
        0,
 };
 
 /* local data declarations */
-mach_timespec_t                *RtcTime = (mach_timespec_t *)0;
-mach_timespec_t                *RtcAlrm;
-clock_res_t                    RtcDelt;
 
 
-/* global data declarations */
-struct {
-       uint64_t                        abstime;
+static clock_timer_func_t      rtclock_timer_expire;
+
+static timer_call_data_t       rtclock_alarm_timer;
 
 
-       mach_timespec_t         time;
-       mach_timespec_t         alarm_time;     /* time of next alarm */
+static void    rtclock_alarm_expire(
+                       timer_call_param_t      p0,
+                       timer_call_param_t      p1);
 
 
-       mach_timespec_t         calend_offset;
+struct {
+       mach_timespec_t                 calend_offset;
        boolean_t                       calend_is_set;
 
        boolean_t                       calend_is_set;
 
-       uint64_t                        timer_deadline;
-       boolean_t                       timer_is_set;
-       clock_timer_func_t      timer_expire;
+       int64_t                         calend_adjtotal;
+       int32_t                         calend_adjdelta;
+
+       uint32_t                        boottime;
 
 
-       clock_res_t                     new_ires;       /* pending new resolution (nano ) */
-       clock_res_t                     intr_nsec;      /* interrupt resolution (nano) */
+        mach_timebase_info_data_t      timebase_const;
 
        decl_simple_lock_data(,lock)    /* real-time clock device lock */
 } rtclock;
 
 
        decl_simple_lock_data(,lock)    /* real-time clock device lock */
 } rtclock;
 
-unsigned int           clknum;                        /* clks per second */
-unsigned int           new_clknum;                    /* pending clknum */
-unsigned int           time_per_clk;                  /* time per clk in ZHZ */
-unsigned int           clks_per_int;                  /* clks per interrupt */
-unsigned int           clks_per_int_99;
-int                                    rtc_intr_count;                /* interrupt counter */
-int                                    rtc_intr_hertz;                /* interrupts per HZ */
-int                                    rtc_intr_freq;                 /* interrupt frequency */
-int                                    rtc_print_lost_tick;           /* print lost tick */
+boolean_t              rtc_initialized = FALSE;
+clock_res_t            rtc_intr_nsec = NSEC_PER_HZ;    /* interrupt res */
+uint64_t               rtc_cycle_count;        /* clocks in 1/20th second */
+uint64_t               rtc_cyc_per_sec;        /* processor cycles per sec */
+uint32_t               rtc_boot_frequency;     /* provided by 1st speed-step */
+uint32_t               rtc_quant_scale;        /* clock to nanos multiplier */
+uint32_t               rtc_quant_shift;        /* clock to nanos right shift */
+uint64_t               rtc_decrementer_min;
 
 
-uint32_t               rtc_cyc_per_sec;                /* processor cycles per seconds */
-uint32_t               rtc_last_int_tsc_lo;            /* tsc values saved per interupt */
-uint32_t               rtc_last_int_tsc_hi;
+static mach_timebase_info_data_t       rtc_lapic_scale; /* nsec to lapic count */
 
 /*
 
 /*
- *     Macros to lock/unlock real-time clock device.
+ *     Macros to lock/unlock real-time clock data.
  */
  */
-#define LOCK_RTC(s)                                    \
-MACRO_BEGIN                                                    \
-       (s) = splclock();                               \
-       simple_lock(&rtclock.lock);             \
+#define RTC_INTRS_OFF(s)               \
+       (s) = splclock()
+
+#define RTC_INTRS_ON(s)                        \
+       splx(s)
+
+#define RTC_LOCK(s)                    \
+MACRO_BEGIN                            \
+       RTC_INTRS_OFF(s);               \
+       simple_lock(&rtclock.lock);     \
 MACRO_END
 
 MACRO_END
 
-#define UNLOCK_RTC(s)                          \
-MACRO_BEGIN                                                    \
+#define RTC_UNLOCK(s)                  \
+MACRO_BEGIN                            \
        simple_unlock(&rtclock.lock);   \
        simple_unlock(&rtclock.lock);   \
-       splx(s);                                                \
+       RTC_INTRS_ON(s);                \
 MACRO_END
 
 /*
 MACRO_END
 
 /*
@@ -182,114 +180,414 @@ MACRO_END
  * The i8254 is a traditional PC device with some arbitrary characteristics.
  * Basically, it is a register that counts at a fixed rate and can be
  * programmed to generate an interrupt every N counts.  The count rate is
  * The i8254 is a traditional PC device with some arbitrary characteristics.
  * Basically, it is a register that counts at a fixed rate and can be
  * programmed to generate an interrupt every N counts.  The count rate is
- * clknum counts per second (see pit.h), historically 1193167 we believe.
+ * clknum counts per sec (see pit.h), historically 1193167=14.318MHz/12
+ * but the more accurate value is 1193182=14.31818MHz/12. [14.31818 MHz being
+ * the master crystal oscillator reference frequency since the very first PC.]
  * Various constants are computed based on this value, and we calculate
  * them at init time for execution efficiency.  To obtain sufficient
  * accuracy, some of the calculation are most easily done in floating
  * point and then converted to int.
  *
  * Various constants are computed based on this value, and we calculate
  * them at init time for execution efficiency.  To obtain sufficient
  * accuracy, some of the calculation are most easily done in floating
  * point and then converted to int.
  *
- * We want an interrupt every 10 milliseconds, approximately.  The count
- * which will do that is clks_per_int.  However, that many counts is not
- * *exactly* 10 milliseconds; it is a bit more or less depending on
- * roundoff.  The actual time per tick is calculated and saved in
- * rtclock.intr_nsec, and it is that value which is added to the time
- * register on each tick.
- *
- * The i8254 counter can be read between interrupts in order to determine
- * the time more accurately.  The counter counts down from the preset value
- * toward 0, and we have to handle the case where the counter has been
- * reset just before being read and before the interrupt has been serviced.
- * Given a count since the last interrupt, the time since then is given
- * by (count * time_per_clk).  In order to minimize integer truncation,
- * we perform this calculation in an arbitrary unit of time which maintains
- * the maximum precision, i.e. such that one tick is 1.0e9 of these units,
- * or close to the precision of a 32-bit int.  We then divide by this unit
- * (which doesn't lose precision) to get nanoseconds.  For notation
- * purposes, this unit is defined as ZHZ = zanoseconds per nanosecond.
+ */
+
+/*
+ * Forward decl.
+ */
+
+static uint64_t        rtc_set_cyc_per_sec(uint64_t cycles);
+uint64_t       rtc_nanotime_read(void);
+
+/*
+ * create_mul_quant_GHZ
+ *   create a constant used to multiply the TSC by to convert to nanoseconds.
+ *   This is a 32 bit number and the TSC *MUST* have a frequency higher than
+ *   1000Mhz for this routine to work.
  *
  *
- * This sequence to do all this is in sysclk_gettime.  For efficiency, this
- * sequence also needs the value that the counter will have if it has just
- * overflowed, so we precompute that also.  ALSO, certain platforms
- * (specifically the DEC XL5100) have been observed to have problem
- * with latching the counter, and they occasionally (say, one out of
- * 100,000 times) return a bogus value.  Hence, the present code reads
- * the counter twice and checks for a consistent pair of values.
+ * The theory here is that we know how many TSCs-per-sec the processor runs at.
+ * Normally to convert this to nanoseconds you would multiply the current
+ * timestamp by 1000000000 (a billion) then divide by TSCs-per-sec.
+ * Unfortunatly the TSC is 64 bits which would leave us with 96 bit intermediate
+ * results from the multiply that must be divided by.
+ * Usually thats
+ *   uint96 = tsc * numer
+ *   nanos = uint96 / denom
+ * Instead, we create this quant constant and it becomes the numerator,
+ * the denominator can then be 0x100000000 which makes our division as simple as
+ * forgetting the lower 32 bits of the result. We can also pass this number to
+ * user space as the numer and pass 0xFFFFFFFF (RTC_FAST_DENOM) as the denom to
+ * convert raw counts * to nanos. The difference is so small as to be
+ * undetectable by anything.
  *
  *
- * Some attributes of the rt clock can be changed, including the
- * interrupt resolution.  We default to the minimum resolution (10 ms),
- * but allow a finer resolution to be requested.  The assumed frequency
- * of the clock can also be set since it appears that the actual
- * frequency of real-world hardware can vary from the nominal by
- * 200 ppm or more.  When the frequency is set, the values above are
- * recomputed and we continue without resetting or changing anything else.
+ * Unfortunatly we can not do this for sub GHZ processors. In this case, all
+ * we do is pass the CPU speed in raw as the denom and we pass in 1000000000
+ * as the numerator. No short cuts allowed
  */
  */
-#define RTC_MINRES     (NSEC_PER_SEC / HZ)     /* nsec per tick */
-#define        RTC_MAXRES      (RTC_MINRES / 20)       /* nsec per tick */
-#define        ZANO            (1000000000)
-#define ZHZ             (ZANO / (NSEC_PER_SEC / HZ))
-#define READ_8254(val) { \
-        outb(PITCTL_PORT, PIT_C0);             \
-       (val) = inb(PITCTR0_PORT);               \
-       (val) |= inb(PITCTR0_PORT) << 8 ; }
-
+#define RTC_FAST_DENOM 0xFFFFFFFF
+inline static uint32_t
+create_mul_quant_GHZ(int shift, uint32_t quant)
+{
+       return (uint32_t)((((uint64_t)NSEC_PER_SEC/20) << shift) / quant);
+}
 /*
 /*
- * Calibration delay counts.
+ * This routine takes a value of raw TSC ticks and applies the passed mul_quant
+ * generated by create_mul_quant() This is our internal routine for creating
+ * nanoseconds.
+ * Since we don't really have uint96_t this routine basically does this....
+ *   uint96_t intermediate = (*value) * scale
+ *   return (intermediate >> 32)
  */
  */
-unsigned int   delaycount = 100;
-unsigned int   microdata = 50;
+inline static uint64_t
+fast_get_nano_from_abs(uint64_t value, int scale)
+{
+    asm ("     movl    %%edx,%%esi     \n\t"
+         "      mull   %%ecx           \n\t"
+         "      movl   %%edx,%%edi     \n\t"
+         "      movl   %%esi,%%eax     \n\t"
+         "      mull   %%ecx           \n\t"
+         "      xorl   %%ecx,%%ecx     \n\t"   
+         "      addl   %%edi,%%eax     \n\t"   
+         "      adcl   %%ecx,%%edx         "
+               : "+A" (value)
+               : "c" (scale)
+               : "%esi", "%edi");
+    return value;
+}
 
 /*
 
 /*
- * Forward decl.
+ * This routine basically does this...
+ * ts.tv_sec = nanos / 1000000000;     create seconds
+ * ts.tv_nsec = nanos % 1000000000;    create remainder nanos
  */
  */
+inline static mach_timespec_t 
+nanos_to_timespec(uint64_t nanos)
+{
+       union {
+               mach_timespec_t ts;
+               uint64_t u64;
+       } ret;
+        ret.u64 = nanos;
+        asm volatile("divl %1" : "+A" (ret.u64) : "r" (NSEC_PER_SEC));
+        return ret.ts;
+}
 
 
-extern int   measure_delay(int us);
-void         rtc_setvals( unsigned int, clock_res_t );
+/*
+ * The following two routines perform the 96 bit arithmetic we need to
+ * convert generic absolute<->nanoseconds
+ * The multiply routine takes a uint64_t and a uint32_t and returns the result
+ * in a uint32_t[3] array.
+ * The divide routine takes this uint32_t[3] array and divides it by a uint32_t
+ * returning a uint64_t
+ */
+inline static void
+longmul(uint64_t       *abstime, uint32_t multiplicand, uint32_t *result)
+{
+    asm volatile(
+        " pushl        %%ebx                   \n\t"   
+        " movl %%eax,%%ebx             \n\t"
+        " movl (%%eax),%%eax           \n\t"
+        " mull %%ecx                   \n\t"
+        " xchg %%eax,%%ebx             \n\t"
+        " pushl        %%edx                   \n\t"
+        " movl 4(%%eax),%%eax          \n\t"
+        " mull %%ecx                   \n\t"
+        " movl %2,%%ecx                \n\t"
+        " movl %%ebx,(%%ecx)           \n\t"
+        " popl %%ebx                   \n\t"
+        " addl %%ebx,%%eax             \n\t"
+        " popl %%ebx                   \n\t"
+        " movl %%eax,4(%%ecx)          \n\t"
+        " adcl $0,%%edx                \n\t"
+        " movl %%edx,8(%%ecx)  // and save it"
+        : : "a"(abstime), "c"(multiplicand), "m"(result));
+    
+}
 
 
-static void  rtc_set_cyc_per_sec();
+inline static uint64_t
+longdiv(uint32_t *numer, uint32_t denom)
+{
+    uint64_t   result;
+    asm volatile(
+        " pushl        %%ebx                   \n\t"
+        " movl %%eax,%%ebx             \n\t"
+        " movl 8(%%eax),%%edx          \n\t"
+        " movl 4(%%eax),%%eax          \n\t"
+        " divl %%ecx                   \n\t"
+        " xchg %%ebx,%%eax             \n\t"
+        " movl (%%eax),%%eax           \n\t"
+        " divl %%ecx                   \n\t"
+        " xchg %%ebx,%%edx             \n\t"
+        " popl %%ebx                   \n\t"
+        : "=A"(result) : "a"(numer),"c"(denom));
+    return result;
+}
 
 /*
 
 /*
- * Initialize non-zero clock structure values.
+ * Enable or disable timer 2.
+ * Port 0x61 controls timer 2:
+ *   bit 0 gates the clock,
+ *   bit 1 gates output to speaker.
  */
  */
-void
-rtc_setvals(
-       unsigned int new_clknum,
-       clock_res_t  new_ires
-       )
+inline static void
+enable_PIT2(void)
 {
 {
-    unsigned int timeperclk;
-    unsigned int scale0;
-    unsigned int scale1;
-    unsigned int res;
+    asm volatile(
+        " inb   $0x61,%%al      \n\t"
+        " and   $0xFC,%%al       \n\t"
+        " or    $1,%%al         \n\t"
+        " outb  %%al,$0x61      \n\t"
+        : : : "%al" );
+}
 
 
-    clknum = new_clknum;
-    rtc_intr_freq = (NSEC_PER_SEC / new_ires);
-    rtc_intr_hertz = rtc_intr_freq / HZ;
-    clks_per_int = (clknum + (rtc_intr_freq / 2)) / rtc_intr_freq;
-    clks_per_int_99 = clks_per_int - clks_per_int/100;
+inline static void
+disable_PIT2(void)
+{
+    asm volatile(
+        " inb   $0x61,%%al      \n\t"
+        " and   $0xFC,%%al      \n\t"
+        " outb  %%al,$0x61      \n\t"
+        : : : "%al" );
+}
 
 
-    /*
-     * The following calculations are done with scaling integer operations
-     * in order that the integer results are accurate to the lsb.
-     */
-    timeperclk = div_scale(ZANO, clknum, &scale0);     /* 838.105647 nsec */
+inline static void
+set_PIT2(int value)
+{
+/*
+ * First, tell the clock we are going to write 16 bits to the counter
+ *   and enable one-shot mode (command 0xB8 to port 0x43)
+ * Then write the two bytes into the PIT2 clock register (port 0x42).
+ * Loop until the value is "realized" in the clock,
+ * this happens on the next tick.
+ */
+    asm volatile(
+        " movb  $0xB8,%%al      \n\t"
+        " outb %%al,$0x43      \n\t"
+        " movb %%dl,%%al       \n\t"
+        " outb %%al,$0x42      \n\t"
+        " movb %%dh,%%al       \n\t"
+        " outb %%al,$0x42      \n"
+"1:      inb   $0x42,%%al      \n\t" 
+        " inb  $0x42,%%al      \n\t"
+        " cmp  %%al,%%dh       \n\t"
+        " jne  1b"
+        : : "d"(value) : "%al");
+}
 
 
-    time_per_clk = mul_scale(ZHZ, timeperclk, &scale1);        /* 83810 */
-    if (scale0 > scale1)
-       time_per_clk >>= (scale0 - scale1);
-    else if (scale0 < scale1)
-       panic("rtc_clock: time_per_clk overflow\n");
+inline static uint64_t
+get_PIT2(unsigned int *value)
+{
+    register uint64_t  result;
+/*
+ * This routine first latches the time (command 0x80 to port 0x43),
+ * then gets the time stamp so we know how long the read will take later.
+ * Read (from port 0x42) and return the current value of the timer.
+ */
+    asm volatile(
+        " xorl %%ecx,%%ecx     \n\t"
+        " movb $0x80,%%al      \n\t"
+        " outb %%al,$0x43      \n\t"
+        " rdtsc                        \n\t"
+        " pushl        %%eax           \n\t"
+        " inb  $0x42,%%al      \n\t"
+        " movb %%al,%%cl       \n\t"
+        " inb  $0x42,%%al      \n\t"
+        " movb %%al,%%ch       \n\t"
+        " popl %%eax   "
+        : "=A"(result), "=c"(*value));
+    return result;
+}
 
 
+/*
+ * timeRDTSC()
+ * This routine sets up PIT counter 2 to count down 1/20 of a second.
+ * It pauses until the value is latched in the counter
+ * and then reads the time stamp counter to return to the caller.
+ */
+static uint64_t
+timeRDTSC(void)
+{
+    int                attempts = 0;
+    uint64_t   latchTime;
+    uint64_t   saveTime,intermediate;
+    unsigned int timerValue, lastValue;
+    boolean_t   int_enabled;
     /*
     /*
-     * Notice that rtclock.intr_nsec is signed ==> use unsigned int res
+     * Table of correction factors to account for
+     *   - timer counter quantization errors, and
+     *   - undercounts 0..5
      */
      */
-    res = mul_scale(clks_per_int, timeperclk, &scale1);        /* 10000276 */
-    if (scale0 > scale1)
-       rtclock.intr_nsec = res >> (scale0 - scale1);
-    else
-       panic("rtc_clock: rtclock.intr_nsec overflow\n");
-
-    rtc_intr_count = 1;
-    RtcDelt = rtclock.intr_nsec/2;
+#define        SAMPLE_CLKS_EXACT       (((double) CLKNUM) / 20.0)
+#define        SAMPLE_CLKS_INT         ((int) CLKNUM / 20)
+#define SAMPLE_NSECS           (2000000000LL)
+#define SAMPLE_MULTIPLIER      (((double)SAMPLE_NSECS)*SAMPLE_CLKS_EXACT)
+#define ROUND64(x)             ((uint64_t)((x) + 0.5))
+    uint64_t   scale[6] = {
+       ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-0)), 
+       ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-1)), 
+       ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-2)), 
+       ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-3)), 
+       ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-4)), 
+       ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-5))
+    };
+                            
+    int_enabled = ml_set_interrupts_enabled(FALSE);
+    
+restart:
+    if (attempts >= 2)
+       panic("timeRDTSC() calibation failed with %d attempts\n", attempts);
+    attempts++;
+    enable_PIT2();      // turn on PIT2
+    set_PIT2(0);       // reset timer 2 to be zero
+    latchTime = rdtsc64();     // get the time stamp to time 
+    latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes
+    set_PIT2(SAMPLE_CLKS_INT); // set up the timer for (almost) 1/20th a second
+    saveTime = rdtsc64();      // now time how long a 20th a second is...
+    get_PIT2(&lastValue);
+    get_PIT2(&lastValue);      // read twice, first value may be unreliable
+    do {
+        intermediate = get_PIT2(&timerValue);
+        if (timerValue > lastValue) {
+           printf("Hey we are going backwards! %u -> %u, restarting timing\n",
+                       timerValue,lastValue);
+           set_PIT2(0);
+           disable_PIT2();
+           goto restart;
+       }
+        lastValue = timerValue;
+    } while (timerValue > 5);
+    kprintf("timerValue   %d\n",timerValue);
+    kprintf("intermediate 0x%016llx\n",intermediate);
+    kprintf("saveTime     0x%016llx\n",saveTime);
+    
+    intermediate -= saveTime;          // raw count for about 1/20 second
+    intermediate *= scale[timerValue]; // rescale measured time spent
+    intermediate /= SAMPLE_NSECS;      // so its exactly 1/20 a second
+    intermediate += latchTime;         // add on our save fudge
+    
+    set_PIT2(0);                       // reset timer 2 to be zero
+    disable_PIT2();                    // turn off PIT 2
+
+    ml_set_interrupts_enabled(int_enabled);
+    return intermediate;
+}
+
+static uint64_t
+tsc_to_nanoseconds(uint64_t abstime)
+{
+        uint32_t       numer;
+        uint32_t       denom;
+        uint32_t       intermediate[3];
+        
+        numer = rtclock.timebase_const.numer;
+        denom = rtclock.timebase_const.denom;
+        if (denom == RTC_FAST_DENOM) {
+            abstime = fast_get_nano_from_abs(abstime, numer);
+        } else {
+            longmul(&abstime, numer, intermediate);
+            abstime = longdiv(intermediate, denom);
+        }
+        return abstime;
+}
+
+inline static mach_timespec_t 
+tsc_to_timespec(void)
+{
+        uint64_t       currNanos;
+        currNanos = rtc_nanotime_read();
+        return nanos_to_timespec(currNanos);
+}
+
+#define        DECREMENTER_MAX         UINT_MAX
+static uint32_t
+deadline_to_decrementer(
+       uint64_t        deadline,
+       uint64_t        now)
+{
+       uint64_t        delta;
+
+       if (deadline <= now)
+               return rtc_decrementer_min;
+       else {
+               delta = deadline - now;
+               return MIN(MAX(rtc_decrementer_min,delta),DECREMENTER_MAX); 
+       }
+}
+
+static inline uint64_t
+lapic_time_countdown(uint32_t initial_count)
+{
+       boolean_t               state;
+       uint64_t                start_time;
+       uint64_t                stop_time;
+       lapic_timer_count_t     count;
+
+       state = ml_set_interrupts_enabled(FALSE);
+       lapic_set_timer(FALSE, one_shot, divide_by_1, initial_count);
+       start_time = rdtsc64();
+       do {
+               lapic_get_timer(NULL, NULL, NULL, &count);
+       } while (count > 0);
+       stop_time = rdtsc64();
+       ml_set_interrupts_enabled(state);
+
+       return tsc_to_nanoseconds(stop_time - start_time);
+}
+
+static void
+rtc_lapic_timer_calibrate(void)
+{
+       uint32_t        nsecs;
+       uint64_t        countdown;
+
+       if (!(cpuid_features() & CPUID_FEATURE_APIC))
+               return;
+
+       /*
+        * Set the local apic timer counting down to zero without an interrupt.
+        * Use the timestamp to calculate how long this takes.
+        */ 
+       nsecs = (uint32_t) lapic_time_countdown(rtc_intr_nsec);
+
+       /*
+        * Compute a countdown ratio for a given time in nanoseconds.
+        * That is, countdown = time * numer / denom.
+        */
+       countdown = (uint64_t)rtc_intr_nsec * (uint64_t)rtc_intr_nsec / nsecs;
+
+       nsecs = (uint32_t) lapic_time_countdown((uint32_t) countdown);
+
+       rtc_lapic_scale.numer = countdown;
+       rtc_lapic_scale.denom = nsecs;
+
+       kprintf("rtc_lapic_timer_calibrate() scale: %d/%d\n",
+               (uint32_t) countdown, nsecs);
+}
+
+static void
+rtc_lapic_set_timer(
+       uint32_t        interval)
+{
+       uint64_t        count;
+
+       assert(rtc_lapic_scale.denom);
+
+       count = interval * (uint64_t) rtc_lapic_scale.numer;
+       count /= rtc_lapic_scale.denom;
+
+       lapic_set_timer(TRUE, one_shot, divide_by_1, (uint32_t) count);
+}
+
+static void
+rtc_lapic_start_ticking(void)
+{
+       uint64_t        abstime;
+       uint64_t        first_tick;
+       uint64_t        decr;
+
+       abstime = mach_absolute_time();
+       first_tick = abstime + NSEC_PER_HZ;
+       current_cpu_datap()->cpu_rtc_tick_deadline = first_tick;
+       decr = deadline_to_decrementer(first_tick, abstime);
+       rtc_lapic_set_timer(decr);
 }
 
 /*
 }
 
 /*
@@ -300,267 +598,415 @@ rtc_setvals(
 int
 sysclk_config(void)
 {
 int
 sysclk_config(void)
 {
-       int     RtcFlag;
-       int     pic;
 
 
-#if    NCPUS > 1
        mp_disable_preemption();
        if (cpu_number() != master_cpu) {
                mp_enable_preemption();
                return(1);
        }
        mp_enable_preemption();
        mp_disable_preemption();
        if (cpu_number() != master_cpu) {
                mp_enable_preemption();
                return(1);
        }
        mp_enable_preemption();
-#endif
+
+       timer_call_setup(&rtclock_alarm_timer, rtclock_alarm_expire, NULL);
+
+       simple_lock_init(&rtclock.lock, 0);
+
+       return (1);
+}
+
+
+/*
+ * Nanotime/mach_absolutime_time
+ * -----------------------------
+ * The timestamp counter (tsc) - which counts cpu clock cycles and can be read
+ * efficient by the kernel and in userspace - is the reference for all timing.
+ * However, the cpu clock rate is not only platform-dependent but can change
+ * (speed-step) dynamically. Hence tsc is converted into nanoseconds which is
+ * identical to mach_absolute_time. The conversion to tsc to nanoseconds is
+ * encapsulated by nanotime.
+ *
+ * The kernel maintains nanotime information recording:
+ *     - the current ratio of tsc to nanoseconds
+ *       with this ratio expressed as a 32-bit scale and shift
+ *       (power of 2 divider);
+ *     - the tsc (step_tsc) and nanotime (step_ns) at which the current
+ *       ratio (clock speed) began.
+ * So a tsc value can be converted to nanotime by:
+ *
+ *     nanotime = (((tsc - step_tsc)*scale) >> shift) + step_ns
+ *
+ * In general, (tsc - step_tsc) is a 64-bit quantity with the scaling
+ * involving a 96-bit intermediate value. However, by saving the converted 
+ * values at each tick (or at any intervening speed-step) - base_tsc and
+ * base_ns - we can perform conversions relative to these and be assured that
+ * (tsc - tick_tsc) is 32-bits. Hence:
+ *
+ *     fast_nanotime = (((tsc - base_tsc)*scale) >> shift) + base_ns  
+ *
+ * The tuple {base_tsc, base_ns, scale, shift} is exported in the commpage 
+ * for the userspace nanotime routine to read. A duplicate check_tsc is
+ * appended so that the consistency of the read can be verified. Note that
+ * this scheme is essential for MP systems in which the commpage is updated
+ * by the master cpu but may be read concurrently by other cpus.
+ * 
+ */
+static inline void
+rtc_nanotime_set_commpage(rtc_nanotime_t *rntp)
+{
+       commpage_nanotime_t     cp_nanotime;
+
+       /* Only the master cpu updates the commpage */
+       if (cpu_number() != master_cpu)
+               return;
+
+       cp_nanotime.nt_base_tsc = rntp->rnt_tsc;
+       cp_nanotime.nt_base_ns = rntp->rnt_nanos;
+       cp_nanotime.nt_scale = rntp->rnt_scale;
+       cp_nanotime.nt_shift = rntp->rnt_shift;
+
+       commpage_set_nanotime(&cp_nanotime);
+}
+
+static void
+rtc_nanotime_init(void)
+{
+       rtc_nanotime_t  *rntp = &current_cpu_datap()->cpu_rtc_nanotime;
+       rtc_nanotime_t  *master_rntp = &cpu_datap(master_cpu)->cpu_rtc_nanotime;
+
+       if (cpu_number() == master_cpu) {
+               rntp->rnt_tsc = rdtsc64();
+               rntp->rnt_nanos = tsc_to_nanoseconds(rntp->rnt_tsc);
+               rntp->rnt_scale = rtc_quant_scale;
+               rntp->rnt_shift = rtc_quant_shift;
+               rntp->rnt_step_tsc = 0ULL;
+               rntp->rnt_step_nanos = 0ULL;
+       } else {
+               /*
+                * Copy master processor's nanotime info.
+                * Loop required in case this changes while copying.
+                */
+               do {
+                       *rntp = *master_rntp;
+               } while (rntp->rnt_tsc != master_rntp->rnt_tsc);
+       }
+}
+
+static inline void
+_rtc_nanotime_update(rtc_nanotime_t *rntp, uint64_t    tsc)
+{
+       uint64_t        tsc_delta;
+       uint64_t        ns_delta;
+
+       tsc_delta = tsc - rntp->rnt_step_tsc;
+       ns_delta = tsc_to_nanoseconds(tsc_delta);
+       rntp->rnt_nanos = rntp->rnt_step_nanos + ns_delta;
+       rntp->rnt_tsc = tsc;
+}
+
+static void
+rtc_nanotime_update(void)
+{
+       rtc_nanotime_t  *rntp = &current_cpu_datap()->cpu_rtc_nanotime;
+
+       assert(get_preemption_level() > 0);
+       assert(!ml_get_interrupts_enabled());
+        
+       _rtc_nanotime_update(rntp, rdtsc64());
+       rtc_nanotime_set_commpage(rntp);
+}
+
+static void
+rtc_nanotime_scale_update(void)
+{
+       rtc_nanotime_t  *rntp = &current_cpu_datap()->cpu_rtc_nanotime;
+       uint64_t        tsc = rdtsc64();
+
+       assert(!ml_get_interrupts_enabled());
+        
        /*
        /*
-        * Setup device.
+        * Update time based on past scale.
         */
         */
-#if    MP_V1_1
-    {
-       extern boolean_t mp_v1_1_initialized;
-       if (mp_v1_1_initialized)
-           pic = 2;
-       else
-           pic = 0;
-    }
-#else
-       pic = 0;        /* FIXME .. interrupt registration moved to AppleIntelClock */
-#endif
+       _rtc_nanotime_update(rntp, tsc);
 
 
+       /*
+        * Update scale and timestamp this update.
+        */
+       rntp->rnt_scale = rtc_quant_scale;
+       rntp->rnt_shift = rtc_quant_shift;
+       rntp->rnt_step_tsc = rntp->rnt_tsc;
+       rntp->rnt_step_nanos = rntp->rnt_nanos;
+
+       /* Export update to userland */
+       rtc_nanotime_set_commpage(rntp);
+}
+
+static uint64_t
+_rtc_nanotime_read(void)
+{
+       rtc_nanotime_t  *rntp = &current_cpu_datap()->cpu_rtc_nanotime;
+       uint64_t        rnt_tsc;
+       uint32_t        rnt_scale;
+       uint32_t        rnt_shift;
+       uint64_t        rnt_nanos;
+       uint64_t        tsc;
+       uint64_t        tsc_delta;
+
+       rnt_scale = rntp->rnt_scale;
+       if (rnt_scale == 0)
+               return 0ULL;
+
+       rnt_shift = rntp->rnt_shift;
+       rnt_nanos = rntp->rnt_nanos;
+       rnt_tsc = rntp->rnt_tsc;
+       tsc = rdtsc64();
+
+       tsc_delta = tsc - rnt_tsc;
+       if ((tsc_delta >> 32) != 0)
+               return rnt_nanos + tsc_to_nanoseconds(tsc_delta);
+
+       /* Let the compiler optimize(?): */
+       if (rnt_shift == 32)
+               return rnt_nanos + ((tsc_delta * rnt_scale) >> 32);     
+       else 
+               return rnt_nanos + ((tsc_delta * rnt_scale) >> rnt_shift);
+}
+
+uint64_t
+rtc_nanotime_read(void)
+{
+       uint64_t        result;
+       uint64_t        rnt_tsc;
+       rtc_nanotime_t  *rntp = &current_cpu_datap()->cpu_rtc_nanotime;
+
+       /*
+        * Use timestamp to ensure the uptime record isn't changed.
+        * This avoids disabling interrupts.
+        * And not this is a per-cpu structure hence no locking.
+        */
+       do {
+               rnt_tsc = rntp->rnt_tsc;
+               result = _rtc_nanotime_read();
+       } while (rnt_tsc != rntp->rnt_tsc);
+
+       return result;
+}
+
+
+/*
+ * This function is called by the speed-step driver when a
+ * change of cpu clock frequency is about to occur.
+ * The scale is not changed until rtc_clock_stepped() is called.
+ * Between these times there is an uncertainty is exactly when
+ * the change takes effect. FIXME: by using another timing source
+ * we could eliminate this error.
+ */
+void
+rtc_clock_stepping(__unused uint32_t new_frequency,
+                  __unused uint32_t old_frequency)
+{
+       boolean_t       istate;
+
+       istate = ml_set_interrupts_enabled(FALSE);
+       rtc_nanotime_scale_update();
+       ml_set_interrupts_enabled(istate);
+}
+
+/*
+ * This function is called by the speed-step driver when a
+ * change of cpu clock frequency has just occured. This change
+ * is expressed as a ratio relative to the boot clock rate.
+ */
+void
+rtc_clock_stepped(uint32_t new_frequency, uint32_t old_frequency)
+{
+       boolean_t       istate;
+
+       istate = ml_set_interrupts_enabled(FALSE);
+       if (rtc_boot_frequency == 0) {
+               /*
+                * At the first ever stepping, old frequency is the real
+                * initial clock rate. This step and all others are based
+                * relative to this initial frequency at which the tsc
+                * calibration was made. Hence we must remember this base
+                * frequency as reference.
+                */
+               rtc_boot_frequency = old_frequency;
+       }
+       rtc_set_cyc_per_sec(rtc_cycle_count * new_frequency /
+                               rtc_boot_frequency);
+       rtc_nanotime_scale_update();
+       ml_set_interrupts_enabled(istate);
+}
+
+/*
+ * rtc_sleep_wakeup() is called from acpi on awakening from a S3 sleep
+ */
+void
+rtc_sleep_wakeup(void)
+{
+       rtc_nanotime_t  *rntp = &current_cpu_datap()->cpu_rtc_nanotime;
+
+       boolean_t       istate;
+
+       istate = ml_set_interrupts_enabled(FALSE);
 
        /*
 
        /*
-        * We should attempt to test the real-time clock
-        * device here. If it were to fail, we should panic
-        * the system.
+        * Reset nanotime.
+        * The timestamp counter will have been reset
+        * but nanotime (uptime) marches onward.
+        * We assume that we're still at the former cpu frequency.
         */
         */
-       RtcFlag = /* test device */1;
-       printf("realtime clock configured\n");
+       rntp->rnt_tsc = rdtsc64();
+       rntp->rnt_step_tsc = 0ULL;
+       rntp->rnt_step_nanos = rntp->rnt_nanos;
+       rtc_nanotime_set_commpage(rntp);
+
+       /* Restart tick interrupts from the LAPIC timer */
+       rtc_lapic_start_ticking();
 
 
-       simple_lock_init(&rtclock.lock, ETAP_NO_TRACE);
-       return (RtcFlag);
+       ml_set_interrupts_enabled(istate);
 }
 
 /*
 }
 
 /*
- * Initialize the real-time clock device. Return success (1)
- * or failure (0). Since the real-time clock is required to
- * provide canonical mapped time, we allocate a page to keep
- * the clock time value. In addition, various variables used
- * to support the clock are initialized.  Note: the clock is
- * not started until rtclock_reset is called.
+ * Initialize the real-time clock device.
+ * In addition, various variables used to support the clock are initialized.
  */
 int
 sysclk_init(void)
 {
  */
 int
 sysclk_init(void)
 {
-       vm_offset_t     *vp;
-#if    NCPUS > 1
+       uint64_t        cycles;
+
        mp_disable_preemption();
        mp_disable_preemption();
-       if (cpu_number() != master_cpu) {
-               mp_enable_preemption();
-               return(1);
+       if (cpu_number() == master_cpu) {
+               /*
+                * Perform calibration.
+                * The PIT is used as the reference to compute how many
+                * TCS counts (cpu clock cycles) occur per second.
+                */
+               rtc_cycle_count = timeRDTSC();
+               cycles = rtc_set_cyc_per_sec(rtc_cycle_count);
+
+               /*
+                * Set min/max to actual.
+                * ACPI may update these later if speed-stepping is detected.
+                */
+               gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles;
+               gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles;
+               printf("[RTCLOCK] frequency %llu (%llu)\n",
+                      cycles, rtc_cyc_per_sec);
+
+               rtc_lapic_timer_calibrate();
+
+               /* Minimum interval is 1usec */
+               rtc_decrementer_min = deadline_to_decrementer(NSEC_PER_USEC,
+                                                               0ULL);
+               /* Point LAPIC interrupts to hardclock() */
+               lapic_set_timer_func((i386_intr_func_t) rtclock_intr);
+
+               clock_timebase_init();
+               rtc_initialized = TRUE;
        }
        }
+
+       rtc_nanotime_init();
+
+       rtc_lapic_start_ticking();
+
        mp_enable_preemption();
        mp_enable_preemption();
-#endif
 
 
-       RtcTime = &rtclock.time;
-       rtc_setvals( CLKNUM, RTC_MINRES );  /* compute constants */
-       rtc_set_cyc_per_sec();  /* compute number of tsc beats per second */
        return (1);
 }
 
        return (1);
 }
 
-static volatile unsigned int     last_ival = 0;
-
 /*
  * Get the clock device time. This routine is responsible
  * for converting the device's machine dependent time value
  * into a canonical mach_timespec_t value.
  */
 /*
  * Get the clock device time. This routine is responsible
  * for converting the device's machine dependent time value
  * into a canonical mach_timespec_t value.
  */
-kern_return_t
-sysclk_gettime(
+static kern_return_t
+sysclk_gettime_internal(
        mach_timespec_t *cur_time)      /* OUT */
 {
        mach_timespec_t *cur_time)      /* OUT */
 {
-        mach_timespec_t        itime = {0, 0};
-       unsigned int    val, val2;
-       int             s;
-
-       if (!RtcTime) {
-               /* Uninitialized */
-               cur_time->tv_nsec = 0;
-               cur_time->tv_sec = 0;
-               return (KERN_SUCCESS);
-       }
-
-       /*
-        * Inhibit interrupts. Determine the incremental
-        * time since the last interrupt. (This could be
-        * done in assembler for a bit more speed).
-        */
-       LOCK_RTC(s);
-       do {
-           READ_8254(val);                 /* read clock */
-           READ_8254(val2);                /* read clock */
-       } while ( val2 > val || val2 < val - 10 );
-       if ( val > clks_per_int_99 ) {
-           outb( 0x0a, 0x20 );             /* see if interrupt pending */
-           if ( inb( 0x20 ) & 1 )
-               itime.tv_nsec = rtclock.intr_nsec; /* yes, add a tick */
-       }
-       itime.tv_nsec += ((clks_per_int - val) * time_per_clk) / ZHZ;
-       if ( itime.tv_nsec < last_ival ) {
-           if (rtc_print_lost_tick)
-               printf( "rtclock: missed clock interrupt.\n" );
-       }
-       last_ival = itime.tv_nsec;
-       cur_time->tv_sec = rtclock.time.tv_sec;
-       cur_time->tv_nsec = rtclock.time.tv_nsec;
-       UNLOCK_RTC(s);
-       ADD_MACH_TIMESPEC(cur_time, ((mach_timespec_t *)&itime));
+       *cur_time = tsc_to_timespec();
        return (KERN_SUCCESS);
 }
 
 kern_return_t
        return (KERN_SUCCESS);
 }
 
 kern_return_t
-sysclk_gettime_internal(
+sysclk_gettime(
        mach_timespec_t *cur_time)      /* OUT */
 {
        mach_timespec_t *cur_time)      /* OUT */
 {
-        mach_timespec_t        itime = {0, 0};
-       unsigned int    val, val2;
-
-       if (!RtcTime) {
-               /* Uninitialized */
-               cur_time->tv_nsec = 0;
-               cur_time->tv_sec = 0;
-               return (KERN_SUCCESS);
-       }
-
-       /*
-        * Inhibit interrupts. Determine the incremental
-        * time since the last interrupt. (This could be
-        * done in assembler for a bit more speed).
-        */
-       do {
-           READ_8254(val);                 /* read clock */
-           READ_8254(val2);                /* read clock */
-       } while ( val2 > val || val2 < val - 10 );
-       if ( val > clks_per_int_99 ) {
-           outb( 0x0a, 0x20 );             /* see if interrupt pending */
-           if ( inb( 0x20 ) & 1 )
-               itime.tv_nsec = rtclock.intr_nsec; /* yes, add a tick */
-       }
-       itime.tv_nsec += ((clks_per_int - val) * time_per_clk) / ZHZ;
-       if ( itime.tv_nsec < last_ival ) {
-           if (rtc_print_lost_tick)
-               printf( "rtclock: missed clock interrupt.\n" );
-       }
-       last_ival = itime.tv_nsec;
-       cur_time->tv_sec = rtclock.time.tv_sec;
-       cur_time->tv_nsec = rtclock.time.tv_nsec;
-       ADD_MACH_TIMESPEC(cur_time, ((mach_timespec_t *)&itime));
-       return (KERN_SUCCESS);
+       return sysclk_gettime_internal(cur_time);
 }
 
 }
 
-/*
- * Get the clock device time when ALL interrupts are already disabled.
- * Same as above except for turning interrupts off and on.
- * This routine is responsible for converting the device's machine dependent
- * time value into a canonical mach_timespec_t value.
- */
 void
 sysclk_gettime_interrupts_disabled(
        mach_timespec_t *cur_time)      /* OUT */
 {
 void
 sysclk_gettime_interrupts_disabled(
        mach_timespec_t *cur_time)      /* OUT */
 {
-       mach_timespec_t itime = {0, 0};
-       unsigned int    val;
+       (void) sysclk_gettime_internal(cur_time);
+}
 
 
-       if (!RtcTime) {
-               /* Uninitialized */
-               cur_time->tv_nsec = 0;
-               cur_time->tv_sec = 0;
-               return;
-       }
+// utility routine 
+// Code to calculate how many processor cycles are in a second...
 
 
-       simple_lock(&rtclock.lock);
+static uint64_t
+rtc_set_cyc_per_sec(uint64_t cycles)
+{
+
+        if (cycles > (NSEC_PER_SEC/20)) {
+            // we can use just a "fast" multiply to get nanos
+           rtc_quant_shift = 32;
+            rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, cycles);
+            rtclock.timebase_const.numer = rtc_quant_scale; // timeRDTSC is 1/20
+           rtclock.timebase_const.denom = RTC_FAST_DENOM;
+        } else {
+           rtc_quant_shift = 26;
+            rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, cycles);
+            rtclock.timebase_const.numer = NSEC_PER_SEC/20; // timeRDTSC is 1/20
+            rtclock.timebase_const.denom = cycles;
+        }
+       rtc_cyc_per_sec = cycles*20;    // multiply it by 20 and we are done..
+                                       // BUT we also want to calculate...
+
+        cycles = ((rtc_cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR/2))
+                       / UI_CPUFREQ_ROUNDING_FACTOR)
+                               * UI_CPUFREQ_ROUNDING_FACTOR;
 
        /*
 
        /*
-        * Copy the current time knowing that we cant be interrupted
-        * between the two longwords and so dont need to use MTS_TO_TS
+        * Set current measured speed.
         */
         */
-       READ_8254(val);                     /* read clock */
-       if ( val > clks_per_int_99 ) {
-           outb( 0x0a, 0x20 );             /* see if interrupt pending */
-           if ( inb( 0x20 ) & 1 )
-               itime.tv_nsec = rtclock.intr_nsec; /* yes, add a tick */
-       }
-       itime.tv_nsec += ((clks_per_int - val) * time_per_clk) / ZHZ;
-       if ( itime.tv_nsec < last_ival ) {
-           if (rtc_print_lost_tick)
-               printf( "rtclock: missed clock interrupt.\n" );
-       }
-       last_ival = itime.tv_nsec;
-       cur_time->tv_sec = rtclock.time.tv_sec;
-       cur_time->tv_nsec = rtclock.time.tv_nsec;
-       ADD_MACH_TIMESPEC(cur_time, ((mach_timespec_t *)&itime));
+        if (cycles >= 0x100000000ULL) {
+            gPEClockFrequencyInfo.cpu_clock_rate_hz = 0xFFFFFFFFUL;
+        } else {
+            gPEClockFrequencyInfo.cpu_clock_rate_hz = (unsigned long)cycles;
+        }
+        gPEClockFrequencyInfo.cpu_frequency_hz = cycles;
 
 
-       simple_unlock(&rtclock.lock);
+       kprintf("[RTCLOCK] frequency %llu (%llu)\n", cycles, rtc_cyc_per_sec);
+       return(cycles);
 }
 
 }
 
-// utility routine 
-// Code to calculate how many processor cycles are in a second...
-
-static void
-rtc_set_cyc_per_sec() 
+void
+clock_get_system_microtime(
+       uint32_t                        *secs,
+       uint32_t                        *microsecs)
 {
 {
+       mach_timespec_t         now;
 
 
-        int     x, y;
-        uint64_t cycles;
-        uint32_t   c[15];          // array for holding sampled cycle counts
-        mach_timespec_t tst[15];  // array for holding time values. NOTE for some reason tv_sec not work
+       (void) sysclk_gettime_internal(&now);
 
 
-        for (x=0; x<15; x++) {  // quick sample 15 times
-                tst[x].tv_sec = 0;
-                tst[x].tv_nsec = 0;
-                sysclk_gettime_internal(&tst[x]);
-               rdtsc_hilo(&y, &c[x]);
-        }
-        y = 0;
-        cycles = 0;
-        for (x=0; x<14; x++) {
-          // simple formula really. calculate the numerator as the number of elapsed processor
-          // cycles * 1000 to adjust for the resolution we want. The denominator is the
-          // elapsed "real" time in nano-seconds. The result will be the processor speed in  
-          // Mhz. any overflows will be discarded before they are added
-          if ((c[x+1] > c[x]) && (tst[x+1].tv_nsec > tst[x].tv_nsec)) {
-                cycles += ((uint64_t)(c[x+1]-c[x]) * NSEC_PER_SEC ) / (uint64_t)(tst[x+1].tv_nsec - tst[x].tv_nsec);       // elapsed nsecs
-                y +=1;
-          }
-        }
-        if (y>0) { // we got more than 1 valid sample. This also takes care of the case of if the clock isn't running
-          cycles = cycles / y;    // calc our average
-        }
-       rtc_cyc_per_sec = cycles;
-       rdtsc_hilo(&rtc_last_int_tsc_hi, &rtc_last_int_tsc_lo);
+       *secs = now.tv_sec;
+       *microsecs = now.tv_nsec / NSEC_PER_USEC;
 }
 
 }
 
-static
-natural_t
-get_uptime_cycles(void)
+void
+clock_get_system_nanotime(
+       uint32_t                        *secs,
+       uint32_t                        *nanosecs)
 {
 {
-        // get the time since the last interupt based on the processors TSC ignoring the
-        // RTC for speed
-        uint32_t   a,d,intermediate_lo,intermediate_hi,result;
-        uint64_t   newTime;
-        
-       rdtsc_hilo(&d, &a);
-        if (d != rtc_last_int_tsc_hi) {
-         newTime = d-rtc_last_int_tsc_hi;
-          newTime = (newTime<<32) + (a-rtc_last_int_tsc_lo);
-          result = newTime;
-        } else {
-          result = a-rtc_last_int_tsc_lo;
-        }
-        __asm__ volatile ( " mul %3 ": "=eax" (intermediate_lo), "=edx" (intermediate_hi): "a"(result), "d"(NSEC_PER_SEC) );
-        __asm__ volatile ( " div %3": "=eax" (result): "eax"(intermediate_lo), "edx" (intermediate_hi), "ecx" (rtc_cyc_per_sec) );
-        return result;
-}
+       mach_timespec_t         now;
+
+       (void) sysclk_gettime_internal(&now);
 
 
+       *secs = now.tv_sec;
+       *nanosecs = now.tv_nsec;
+}
 
 /*
  * Get clock device attributes.
 
 /*
  * Get clock device attributes.
@@ -571,31 +1017,18 @@ sysclk_getattr(
        clock_attr_t            attr,           /* OUT */
        mach_msg_type_number_t  *count)         /* IN/OUT */
 {
        clock_attr_t            attr,           /* OUT */
        mach_msg_type_number_t  *count)         /* IN/OUT */
 {
-       spl_t   s;
-
        if (*count != 1)
                return (KERN_FAILURE);
        switch (flavor) {
 
        case CLOCK_GET_TIME_RES:        /* >0 res */
        if (*count != 1)
                return (KERN_FAILURE);
        switch (flavor) {
 
        case CLOCK_GET_TIME_RES:        /* >0 res */
-#if    (NCPUS == 1 || (MP_V1_1 && 0))
-               LOCK_RTC(s);
-               *(clock_res_t *) attr = 1000;
-               UNLOCK_RTC(s);
-               break;
-#endif /* (NCPUS == 1 || (MP_V1_1 && 0)) && AT386 */
-       case CLOCK_ALARM_CURRES:        /* =0 no alarm */
-               LOCK_RTC(s);
-               *(clock_res_t *) attr = rtclock.intr_nsec;
-               UNLOCK_RTC(s);
+               *(clock_res_t *) attr = rtc_intr_nsec;
                break;
 
                break;
 
+       case CLOCK_ALARM_CURRES:        /* =0 no alarm */
        case CLOCK_ALARM_MAXRES:
        case CLOCK_ALARM_MAXRES:
-               *(clock_res_t *) attr = RTC_MAXRES;
-               break;
-
        case CLOCK_ALARM_MINRES:
        case CLOCK_ALARM_MINRES:
-               *(clock_res_t *) attr = RTC_MINRES;
+               *(clock_res_t *) attr = 0;
                break;
 
        default:
                break;
 
        default:
@@ -604,60 +1037,6 @@ sysclk_getattr(
        return (KERN_SUCCESS);
 }
 
        return (KERN_SUCCESS);
 }
 
-/*
- * Set clock device attributes.
- */
-kern_return_t
-sysclk_setattr(
-       clock_flavor_t          flavor,
-       clock_attr_t            attr,           /* IN */
-       mach_msg_type_number_t  count)          /* IN */
-{
-       spl_t           s;
-       int             freq;
-       int             adj;
-       clock_res_t     new_ires;
-
-       if (count != 1)
-               return (KERN_FAILURE);
-       switch (flavor) {
-
-       case CLOCK_GET_TIME_RES:
-       case CLOCK_ALARM_MAXRES:
-       case CLOCK_ALARM_MINRES:
-               return (KERN_FAILURE);
-
-       case CLOCK_ALARM_CURRES:
-               new_ires = *(clock_res_t *) attr;
-
-               /*
-                * The new resolution must be within the predetermined
-                * range.  If the desired resolution cannot be achieved
-                * to within 0.1%, an error is returned.
-                */
-               if (new_ires < RTC_MAXRES || new_ires > RTC_MINRES)
-                       return (KERN_INVALID_VALUE);
-               freq = (NSEC_PER_SEC / new_ires);
-               adj = (((clknum % freq) * new_ires) / clknum);
-               if (adj > (new_ires / 1000))
-                       return (KERN_INVALID_VALUE);
-               /*
-                * Record the new alarm resolution which will take effect
-                * on the next HZ aligned clock tick.
-                */
-               LOCK_RTC(s);
-               if ( freq != rtc_intr_freq ) {
-                   rtclock.new_ires = new_ires;
-                   new_clknum = clknum;
-               }
-               UNLOCK_RTC(s);
-               return (KERN_SUCCESS);
-
-       default:
-               return (KERN_INVALID_VALUE);
-       }
-}
-
 /*
  * Set next alarm time for the clock device. This call
  * always resets the time to deliver an alarm for the
 /*
  * Set next alarm time for the clock device. This call
  * always resets the time to deliver an alarm for the
@@ -667,12 +1046,9 @@ void
 sysclk_setalarm(
        mach_timespec_t *alarm_time)
 {
 sysclk_setalarm(
        mach_timespec_t *alarm_time)
 {
-       spl_t           s;
-
-       LOCK_RTC(s);
-       rtclock.alarm_time = *alarm_time;
-       RtcAlrm = &rtclock.alarm_time;
-       UNLOCK_RTC(s);
+       timer_call_enter(&rtclock_alarm_timer,
+                        (uint64_t) alarm_time->tv_sec * NSEC_PER_SEC
+                               + alarm_time->tv_nsec);
 }
 
 /*
 }
 
 /*
@@ -702,39 +1078,69 @@ calend_gettime(
 {
        spl_t           s;
 
 {
        spl_t           s;
 
-       LOCK_RTC(s);
+       RTC_LOCK(s);
        if (!rtclock.calend_is_set) {
        if (!rtclock.calend_is_set) {
-               UNLOCK_RTC(s);
+               RTC_UNLOCK(s);
                return (KERN_FAILURE);
        }
 
        (void) sysclk_gettime_internal(cur_time);
        ADD_MACH_TIMESPEC(cur_time, &rtclock.calend_offset);
                return (KERN_FAILURE);
        }
 
        (void) sysclk_gettime_internal(cur_time);
        ADD_MACH_TIMESPEC(cur_time, &rtclock.calend_offset);
-       UNLOCK_RTC(s);
+       RTC_UNLOCK(s);
 
        return (KERN_SUCCESS);
 }
 
 
        return (KERN_SUCCESS);
 }
 
-/*
- * Set the current clock time.
- */
-kern_return_t
-calend_settime(
-       mach_timespec_t *new_time)
+void
+clock_get_calendar_microtime(
+       uint32_t                        *secs,
+       uint32_t                        *microsecs)
+{
+       mach_timespec_t         now;
+
+       calend_gettime(&now);
+
+       *secs = now.tv_sec;
+       *microsecs = now.tv_nsec / NSEC_PER_USEC;
+}
+
+void
+clock_get_calendar_nanotime(
+       uint32_t                        *secs,
+       uint32_t                        *nanosecs)
 {
 {
-       mach_timespec_t curr_time;
+       mach_timespec_t         now;
+
+       calend_gettime(&now);
+
+       *secs = now.tv_sec;
+       *nanosecs = now.tv_nsec;
+}
+
+void
+clock_set_calendar_microtime(
+       uint32_t                        secs,
+       uint32_t                        microsecs)
+{
+       mach_timespec_t         new_time, curr_time;
+       uint32_t                        old_offset;
        spl_t           s;
 
        spl_t           s;
 
-       LOCK_RTC(s);
+       new_time.tv_sec = secs;
+       new_time.tv_nsec = microsecs * NSEC_PER_USEC;
+
+       RTC_LOCK(s);
+       old_offset = rtclock.calend_offset.tv_sec;
        (void) sysclk_gettime_internal(&curr_time);
        (void) sysclk_gettime_internal(&curr_time);
-       rtclock.calend_offset = *new_time;
+       rtclock.calend_offset = new_time;
        SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time);
        SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time);
+       rtclock.boottime += rtclock.calend_offset.tv_sec - old_offset;
        rtclock.calend_is_set = TRUE;
        rtclock.calend_is_set = TRUE;
-       UNLOCK_RTC(s);
+       RTC_UNLOCK(s);
 
 
-       (void) bbc_settime(new_time);
+       (void) bbc_settime(&new_time);
 
 
-       return (KERN_SUCCESS);
+       host_notify_calendar_change();
 }
 
 /*
 }
 
 /*
@@ -746,24 +1152,13 @@ calend_getattr(
        clock_attr_t            attr,           /* OUT */
        mach_msg_type_number_t  *count)         /* IN/OUT */
 {
        clock_attr_t            attr,           /* OUT */
        mach_msg_type_number_t  *count)         /* IN/OUT */
 {
-       spl_t   s;
-
        if (*count != 1)
                return (KERN_FAILURE);
        switch (flavor) {
 
        case CLOCK_GET_TIME_RES:        /* >0 res */
        if (*count != 1)
                return (KERN_FAILURE);
        switch (flavor) {
 
        case CLOCK_GET_TIME_RES:        /* >0 res */
-#if    (NCPUS == 1 || (MP_V1_1 && 0))
-               LOCK_RTC(s);
-               *(clock_res_t *) attr = 1000;
-               UNLOCK_RTC(s);
-               break;
-#else  /* (NCPUS == 1 || (MP_V1_1 && 0)) && AT386 */
-               LOCK_RTC(s);
-               *(clock_res_t *) attr = rtclock.intr_nsec;
-               UNLOCK_RTC(s);
+               *(clock_res_t *) attr = rtc_intr_nsec;
                break;
                break;
-#endif /* (NCPUS == 1 || (MP_V1_1 && 0)) && AT386 */
 
        case CLOCK_ALARM_CURRES:        /* =0 no alarm */
        case CLOCK_ALARM_MINRES:
 
        case CLOCK_ALARM_CURRES:        /* =0 no alarm */
        case CLOCK_ALARM_MINRES:
@@ -777,16 +1172,89 @@ calend_getattr(
        return (KERN_SUCCESS);
 }
 
        return (KERN_SUCCESS);
 }
 
-void
-clock_adjust_calendar(
-       clock_res_t     nsec)
+#define tickadj                (40*NSEC_PER_USEC)      /* "standard" skew, ns / tick */
+#define        bigadj          (NSEC_PER_SEC)          /* use 10x skew above bigadj ns */
+
+uint32_t
+clock_set_calendar_adjtime(
+       int32_t                         *secs,
+       int32_t                         *microsecs)
 {
 {
-       spl_t           s;
+       int64_t                 total, ototal;
+       uint32_t                interval = 0;
+       spl_t                   s;
+
+       total = (int64_t)*secs * NSEC_PER_SEC + *microsecs * NSEC_PER_USEC;
+
+       RTC_LOCK(s);
+       ototal = rtclock.calend_adjtotal;
 
 
-       LOCK_RTC(s);
-       if (rtclock.calend_is_set)
-               ADD_MACH_TIMESPEC_NSEC(&rtclock.calend_offset, nsec);
-       UNLOCK_RTC(s);
+       if (total != 0) {
+               int32_t         delta = tickadj;
+
+               if (total > 0) {
+                       if (total > bigadj)
+                               delta *= 10;
+                       if (delta > total)
+                               delta = total;
+               }
+               else {
+                       if (total < -bigadj)
+                               delta *= 10;
+                       delta = -delta;
+                       if (delta < total)
+                               delta = total;
+               }
+
+               rtclock.calend_adjtotal = total;
+               rtclock.calend_adjdelta = delta;
+
+               interval = NSEC_PER_HZ;
+       }
+       else
+               rtclock.calend_adjdelta = rtclock.calend_adjtotal = 0;
+
+       RTC_UNLOCK(s);
+
+       if (ototal == 0)
+               *secs = *microsecs = 0;
+       else {
+               *secs = ototal / NSEC_PER_SEC;
+               *microsecs = ototal % NSEC_PER_SEC;
+       }
+
+       return (interval);
+}
+
+uint32_t
+clock_adjust_calendar(void)
+{
+       uint32_t                interval = 0;
+       int32_t                 delta;
+       spl_t                   s;
+
+       RTC_LOCK(s);
+       delta = rtclock.calend_adjdelta;
+       ADD_MACH_TIMESPEC_NSEC(&rtclock.calend_offset, delta);
+
+       rtclock.calend_adjtotal -= delta;
+
+       if (delta > 0) {
+               if (delta > rtclock.calend_adjtotal)
+                       rtclock.calend_adjdelta = rtclock.calend_adjtotal;
+       }
+       else
+       if (delta < 0) {
+               if (delta < rtclock.calend_adjtotal)
+                       rtclock.calend_adjdelta = rtclock.calend_adjtotal;
+       }
+
+       if (rtclock.calend_adjdelta != 0)
+               interval = NSEC_PER_HZ;
+
+       RTC_UNLOCK(s);
+
+       return (interval);
 }
 
 void
 }
 
 void
@@ -798,198 +1266,176 @@ clock_initialize_calendar(void)
        if (bbc_gettime(&bbc_time) != KERN_SUCCESS)
                return;
 
        if (bbc_gettime(&bbc_time) != KERN_SUCCESS)
                return;
 
-       LOCK_RTC(s);
-       if (!rtclock.calend_is_set) {
-               (void) sysclk_gettime_internal(&curr_time);
-               rtclock.calend_offset = bbc_time;
-               SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time);
-               rtclock.calend_is_set = TRUE;
-       }
-       UNLOCK_RTC(s);
+       RTC_LOCK(s);
+       if (rtclock.boottime == 0)
+               rtclock.boottime = bbc_time.tv_sec;
+       (void) sysclk_gettime_internal(&curr_time);
+       rtclock.calend_offset = bbc_time;
+       SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time);
+       rtclock.calend_is_set = TRUE;
+       RTC_UNLOCK(s);
+
+       host_notify_calendar_change();
 }
 
 }
 
-mach_timespec_t
-clock_get_calendar_offset(void)
+void
+clock_get_boottime_nanotime(
+       uint32_t                        *secs,
+       uint32_t                        *nanosecs)
 {
 {
-       mach_timespec_t result = MACH_TIMESPEC_ZERO;
-       spl_t           s;
-
-       LOCK_RTC(s);
-       if (rtclock.calend_is_set)
-               result = rtclock.calend_offset;
-       UNLOCK_RTC(s);
-
-       return (result);
+       *secs = rtclock.boottime;
+       *nanosecs = 0;
 }
 
 void
 clock_timebase_info(
        mach_timebase_info_t    info)
 {
 }
 
 void
 clock_timebase_info(
        mach_timebase_info_t    info)
 {
-       spl_t   s;
-
-       LOCK_RTC(s);
-       info->numer = info->denom = 1;
-       UNLOCK_RTC(s);
+       info->numer = info->denom =  1;
 }      
 
 void
 clock_set_timer_deadline(
        uint64_t                        deadline)
 {
 }      
 
 void
 clock_set_timer_deadline(
        uint64_t                        deadline)
 {
-       spl_t                   s;
-
-       LOCK_RTC(s);
-       rtclock.timer_deadline = deadline;
-       rtclock.timer_is_set = TRUE;
-       UNLOCK_RTC(s);
+       spl_t           s;
+       cpu_data_t      *pp = current_cpu_datap();
+       rtclock_timer_t *mytimer = &pp->cpu_rtc_timer;
+       uint64_t        abstime;
+       uint64_t        decr;
+
+       assert(get_preemption_level() > 0);
+       assert(rtclock_timer_expire);
+
+       RTC_INTRS_OFF(s);
+       mytimer->deadline = deadline;
+       mytimer->is_set = TRUE;
+       if (!mytimer->has_expired) {
+               abstime = mach_absolute_time();
+               if (mytimer->deadline < pp->cpu_rtc_tick_deadline) {
+                       decr = deadline_to_decrementer(mytimer->deadline,
+                                                      abstime);
+                       rtc_lapic_set_timer(decr);
+                       pp->cpu_rtc_intr_deadline = mytimer->deadline;
+                       KERNEL_DEBUG_CONSTANT(
+                               MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) |
+                                       DBG_FUNC_NONE, decr, 2, 0, 0, 0);
+               }
+       }
+       RTC_INTRS_ON(s);
 }
 
 void
 clock_set_timer_func(
        clock_timer_func_t              func)
 {
 }
 
 void
 clock_set_timer_func(
        clock_timer_func_t              func)
 {
-       spl_t           s;
-
-       LOCK_RTC(s);
-       if (rtclock.timer_expire == NULL)
-               rtclock.timer_expire = func;
-       UNLOCK_RTC(s);
+       if (rtclock_timer_expire == NULL)
+               rtclock_timer_expire = func;
 }
 
 }
 
-\f
-
 /*
 /*
- * Load the count register and start the clock.
+ * Real-time clock device interrupt.
  */
  */
-#define RTCLOCK_RESET()        {                                       \
-       outb(PITCTL_PORT, PIT_C0|PIT_NDIVMODE|PIT_READMODE);    \
-       outb(PITCTR0_PORT, (clks_per_int & 0xff));              \
-       outb(PITCTR0_PORT, (clks_per_int >> 8));                \
-}
-
-/*
- * Reset the clock device. This causes the realtime clock
- * device to reload its mode and count value (frequency).
- * Note: the CPU should be calibrated
- * before starting the clock for the first time.
- */
-
 void
 void
-rtclock_reset(void)
+rtclock_intr(struct i386_interrupt_state *regs)
 {
 {
-       int             s;
+       uint64_t        abstime;
+       uint32_t        latency;
+       uint64_t        decr;
+       uint64_t        decr_tick;
+       uint64_t        decr_timer;
+       cpu_data_t      *pp = current_cpu_datap();
+       rtclock_timer_t *mytimer = &pp->cpu_rtc_timer;
+
+       assert(get_preemption_level() > 0);
+       assert(!ml_get_interrupts_enabled());
+
+        abstime = _rtc_nanotime_read();
+       latency = (uint32_t) abstime - pp->cpu_rtc_intr_deadline;
+       if (pp->cpu_rtc_tick_deadline <= abstime) {
+               rtc_nanotime_update();
+               clock_deadline_for_periodic_event(
+                       NSEC_PER_HZ, abstime, &pp->cpu_rtc_tick_deadline);
+               hertz_tick(
+#if STAT_TIME
+                          NSEC_PER_HZ,
+#endif
+                          (regs->efl & EFL_VM) || ((regs->cs & 0x03) != 0),
+                          regs->eip);
+       }
 
 
-#if    NCPUS > 1 && !(MP_V1_1 && 0)
-       mp_disable_preemption();
-       if (cpu_number() != master_cpu) {
-               mp_enable_preemption();
-               return;
+       abstime = _rtc_nanotime_read();
+       if (mytimer->is_set && mytimer->deadline <= abstime) {
+               mytimer->has_expired = TRUE;
+               mytimer->is_set = FALSE;
+               (*rtclock_timer_expire)(abstime);
+               assert(!ml_get_interrupts_enabled());
+               mytimer->has_expired = FALSE;
        }
        }
-       mp_enable_preemption();
-#endif /* NCPUS > 1 && AT386 && !MP_V1_1 */
-       LOCK_RTC(s);
-       RTCLOCK_RESET();
-       UNLOCK_RTC(s);
-}
 
 
-/*
- * Real-time clock device interrupt. Called only on the
- * master processor. Updates the clock time and upcalls
- * into the higher level clock code to deliver alarms.
- */
-int
-rtclock_intr(void)
-{
-       uint64_t                abstime;
-       mach_timespec_t clock_time;
-       int                             i;
-       spl_t                   s;
+       /* Log the interrupt service latency (-ve value expected by tool) */
+       KERNEL_DEBUG_CONSTANT(
+               MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) | DBG_FUNC_NONE,
+               -latency, (uint32_t)regs->eip, 0, 0, 0);
 
 
-       /*
-        * Update clock time. Do the update so that the macro
-        * MTS_TO_TS() for reading the mapped time works (e.g.
-        * update in order: mtv_csec, mtv_time.tv_nsec, mtv_time.tv_sec).
-        */      
-       LOCK_RTC(s);
-       rdtsc_hilo(&rtc_last_int_tsc_hi, &rtc_last_int_tsc_lo);
-       i = rtclock.time.tv_nsec + rtclock.intr_nsec;
-       if (i < NSEC_PER_SEC)
-           rtclock.time.tv_nsec = i;
-       else {
-           rtclock.time.tv_nsec = i - NSEC_PER_SEC;
-           rtclock.time.tv_sec++;
-       }
-       /* note time now up to date */
-       last_ival = 0;
+       abstime = _rtc_nanotime_read();
+       decr_tick = deadline_to_decrementer(pp->cpu_rtc_tick_deadline, abstime);
+       decr_timer = (mytimer->is_set) ?
+                       deadline_to_decrementer(mytimer->deadline, abstime) :
+                       DECREMENTER_MAX;
+       decr = MIN(decr_tick, decr_timer);
+       pp->cpu_rtc_intr_deadline = abstime + decr;
 
 
-       rtclock.abstime += rtclock.intr_nsec;
-       abstime = rtclock.abstime;
-       if (    rtclock.timer_is_set                            &&
-                       rtclock.timer_deadline <= abstime               ) {
-               rtclock.timer_is_set = FALSE;
-               UNLOCK_RTC(s);
+       rtc_lapic_set_timer(decr);
 
 
-               (*rtclock.timer_expire)(abstime);
+       /* Log the new decrementer value */
+       KERNEL_DEBUG_CONSTANT(
+               MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE,
+               decr, 3, 0, 0, 0);
 
 
-               LOCK_RTC(s);
-       }
+}
 
 
-       /*
-        * Perform alarm clock processing if needed. The time
-        * passed up is incremented by a half-interrupt tick
-        * to trigger alarms closest to their desired times.
-        * The clock_alarm_intr() routine calls sysclk_setalrm()
-        * before returning if later alarms are pending.
-        */
+static void
+rtclock_alarm_expire(
+       __unused timer_call_param_t     p0,
+       __unused timer_call_param_t     p1)
+{
+       mach_timespec_t clock_time;
 
 
-       if (RtcAlrm && (RtcAlrm->tv_sec < RtcTime->tv_sec ||
-                       (RtcAlrm->tv_sec == RtcTime->tv_sec &&
-                        RtcDelt >= RtcAlrm->tv_nsec - RtcTime->tv_nsec))) {
-               clock_time.tv_sec = 0;
-               clock_time.tv_nsec = RtcDelt;
-               ADD_MACH_TIMESPEC (&clock_time, RtcTime);
-               RtcAlrm = 0;
-               UNLOCK_RTC(s);
-               /*
-                * Call clock_alarm_intr() without RTC-lock.
-                * The lock ordering is always CLOCK-lock
-                * before RTC-lock.
-                */
-               clock_alarm_intr(SYSTEM_CLOCK, &clock_time);
-               LOCK_RTC(s);
-       }
+       (void) sysclk_gettime_internal(&clock_time);
 
 
-       /*
-        * On a HZ-tick boundary: return 0 and adjust the clock
-        * alarm resolution (if requested).  Otherwise return a
-        * non-zero value.
-        */
-       if ((i = --rtc_intr_count) == 0) {
-           if (rtclock.new_ires) {
-                       rtc_setvals(new_clknum, rtclock.new_ires);
-                       RTCLOCK_RESET();            /* lock clock register */
-                       rtclock.new_ires = 0;
-           }
-           rtc_intr_count = rtc_intr_hertz;
-       }
-       UNLOCK_RTC(s);
-       return (i);
+       clock_alarm_intr(SYSTEM_CLOCK, &clock_time);
 }
 
 void
 clock_get_uptime(
        uint64_t                *result)
 {
 }
 
 void
 clock_get_uptime(
        uint64_t                *result)
 {
-       uint32_t                ticks;
-       spl_t                   s;
+        *result = rtc_nanotime_read();
+}
 
 
-       LOCK_RTC(s);
-       ticks = get_uptime_cycles();
-       *result = rtclock.abstime;
-       UNLOCK_RTC(s);
+uint64_t
+mach_absolute_time(void)
+{
+        return rtc_nanotime_read();
+}
 
 
-       *result += ticks;
+void
+absolutetime_to_microtime(
+       uint64_t                        abstime,
+       uint32_t                        *secs,
+       uint32_t                        *microsecs)
+{
+       uint32_t        remain;
+
+       asm volatile(
+                       "divl %3"
+                               : "=a" (*secs), "=d" (remain)
+                               : "A" (abstime), "r" (NSEC_PER_SEC));
+       asm volatile(
+                       "divl %3"
+                               : "=a" (*microsecs)
+                               : "0" (remain), "d" (0), "r" (NSEC_PER_USEC));
 }
 
 void
 }
 
 void
@@ -1042,87 +1488,14 @@ nanoseconds_to_absolutetime(
        *result = nanoseconds;
 }
 
        *result = nanoseconds;
 }
 
-/*
- * measure_delay(microseconds)
- *
- * Measure elapsed time for delay calls
- * Returns microseconds.
- * 
- * Microseconds must not be too large since the counter (short) 
- * will roll over.  Max is about 13 ms.  Values smaller than 1 ms are ok.
- * This uses the assumed frequency of the rt clock which is emperically
- * accurate to only about 200 ppm.
- */
-
-int
-measure_delay(
-       int us)
-{
-       unsigned int    lsb, val;
-
-       outb(PITCTL_PORT, PIT_C0|PIT_NDIVMODE|PIT_READMODE);
-       outb(PITCTR0_PORT, 0xff);       /* set counter to max value */
-       outb(PITCTR0_PORT, 0xff);
-       delay(us);
-       outb(PITCTL_PORT, PIT_C0);
-       lsb = inb(PITCTR0_PORT);
-       val = (inb(PITCTR0_PORT) << 8) | lsb;
-       val = 0xffff - val;
-       val *= 1000000;
-       val /= CLKNUM;
-       return(val);
-}
-
-/*
- * calibrate_delay(void)
- *
- * Adjust delaycount.  Called from startup before clock is started
- * for normal interrupt generation.
- */
-
 void
 void
-calibrate_delay(void)
+machine_delay_until(
+       uint64_t                deadline)
 {
 {
-       unsigned        val;
-       int             prev = 0;
-       register int    i;
-
-       printf("adjusting delay count: %d", delaycount);
-       for (i=0; i<10; i++) {
-               prev = delaycount;
-               /* 
-                * microdata must not be too large since measure_timer
-                * will not return accurate values if the counter (short) 
-                * rolls over
-                */
-               val = measure_delay(microdata);
-               if (val == 0) {
-                 delaycount *= 2;
-               } else {
-               delaycount *= microdata;
-               delaycount += val-1;    /* round up to upper us */
-               delaycount /= val;
-               }
-               if (delaycount <= 0)
-                       delaycount = 1;
-               if (delaycount != prev)
-                       printf(" %d", delaycount);
-       }
-       printf("\n");
-}
+       uint64_t                now;
 
 
-#if    MACH_KDB
-void
-test_delay(void);
-
-void
-test_delay(void)
-{
-       register i;
-
-       for (i = 0; i < 10; i++)
-               printf("%d, %d\n", i, measure_delay(i));
-       for (i = 10; i <= 100; i+=10)
-               printf("%d, %d\n", i, measure_delay(i));
+       do {
+               cpu_pause();
+               now = mach_absolute_time();
+       } while (now < deadline);
 }
 }
-#endif /* MACH_KDB */