X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/91447636331957f3d9b5ca5b508f07c526b0074d..eb6b6ca394357805f2bdba989abae309f718b4d8:/osfmk/i386/rtclock.c

diff --git a/osfmk/i386/rtclock.c b/osfmk/i386/rtclock.c
index d6368afa6..bc6fa6524 100644
--- a/osfmk/i386/rtclock.c
+++ b/osfmk/i386/rtclock.c
@@ -1,23 +1,29 @@
 /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
- * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
@@ -33,8 +39,6 @@
  *			the cpu clock counted by the timestamp MSR.
  */
 
-#include <platforms.h>
-#include <mach_kdb.h>
 
 #include <mach/mach_types.h>
 
@@ -46,824 +50,221 @@
 #include <kern/misc_protos.h>
 #include <kern/spl.h>
 #include <kern/assert.h>
+#include <kern/timer_queue.h>
 #include <mach/vm_prot.h>
 #include <vm/pmap.h>
-#include <vm/vm_kern.h>		/* for kernel_map */
-#include <i386/ipl.h>
-#include <i386/pit.h>
-#include <i386/pio.h>
-#include <i386/misc_protos.h>
-#include <i386/proc_reg.h>
+#include <vm/vm_kern.h>         /* for kernel_map */
+#include <architecture/i386/pio.h>
 #include <i386/machine_cpu.h>
-#include <i386/mp.h>
 #include <i386/cpuid.h>
-#include <i386/cpu_data.h>
 #include <i386/cpu_threads.h>
-#include <i386/perfmon.h>
+#include <i386/mp.h>
 #include <i386/machine_routines.h>
-#include <i386/AT386/bbclock_entries.h>
+#include <i386/pal_routines.h>
+#include <i386/proc_reg.h>
+#include <i386/misc_protos.h>
 #include <pexpert/pexpert.h>
 #include <machine/limits.h>
 #include <machine/commpage.h>
 #include <sys/kdebug.h>
+#include <i386/tsc.h>
+#include <i386/rtclock_protos.h>
+#define UI_CPUFREQ_ROUNDING_FACTOR      10000000
 
-#define MAX(a,b) (((a)>(b))?(a):(b))
-#define MIN(a,b) (((a)>(b))?(b):(a))
-
-#define NSEC_PER_HZ			(NSEC_PER_SEC / 100) /* nsec per tick */
-
-#define UI_CPUFREQ_ROUNDING_FACTOR	10000000
-
-int		sysclk_config(void);
-
-int		sysclk_init(void);
-
-kern_return_t	sysclk_gettime(
-	mach_timespec_t			*cur_time);
-
-kern_return_t	sysclk_getattr(
-	clock_flavor_t			flavor,
-	clock_attr_t			attr,
-	mach_msg_type_number_t	*count);
-
-void		sysclk_setalarm(
-	mach_timespec_t			*alarm_time);
-
-/*
- * Lists of clock routines.
- */
-struct clock_ops  sysclk_ops = {
-	sysclk_config,			sysclk_init,
-	sysclk_gettime,			0,
-	sysclk_getattr,			0,
-	sysclk_setalarm,
-};
-
-int		calend_config(void);
-
-int		calend_init(void);
-
-kern_return_t	calend_gettime(
-	mach_timespec_t			*cur_time);
-
-kern_return_t	calend_getattr(
-	clock_flavor_t			flavor,
-	clock_attr_t			attr,
-	mach_msg_type_number_t	*count);
-
-struct clock_ops calend_ops = {
-	calend_config,			calend_init,
-	calend_gettime,			0,
-	calend_getattr,			0,
-	0,
-};
-
-/* local data declarations */
-
-static clock_timer_func_t	rtclock_timer_expire;
-
-static timer_call_data_t	rtclock_alarm_timer;
-
-static void	rtclock_alarm_expire(
-			timer_call_param_t	p0,
-			timer_call_param_t	p1);
-
-struct	{
-	mach_timespec_t			calend_offset;
-	boolean_t			calend_is_set;
-
-	int64_t				calend_adjtotal;
-	int32_t				calend_adjdelta;
-
-	uint32_t			boottime;
-
-        mach_timebase_info_data_t	timebase_const;
-
-	decl_simple_lock_data(,lock)	/* real-time clock device lock */
-} rtclock;
-
-boolean_t		rtc_initialized = FALSE;
-clock_res_t		rtc_intr_nsec = NSEC_PER_HZ;	/* interrupt res */
-uint64_t		rtc_cycle_count;	/* clocks in 1/20th second */
-uint64_t		rtc_cyc_per_sec;	/* processor cycles per sec */
-uint32_t		rtc_boot_frequency;	/* provided by 1st speed-step */
-uint32_t		rtc_quant_scale;	/* clock to nanos multiplier */
-uint32_t		rtc_quant_shift;	/* clock to nanos right shift */
-uint64_t		rtc_decrementer_min;
-
-static	mach_timebase_info_data_t	rtc_lapic_scale; /* nsec to lapic count */
-
-/*
- *	Macros to lock/unlock real-time clock data.
- */
-#define RTC_INTRS_OFF(s)		\
-	(s) = splclock()
-
-#define RTC_INTRS_ON(s)			\
-	splx(s)
-
-#define RTC_LOCK(s)			\
-MACRO_BEGIN				\
-	RTC_INTRS_OFF(s);		\
-	simple_lock(&rtclock.lock);	\
-MACRO_END
-
-#define RTC_UNLOCK(s)			\
-MACRO_BEGIN				\
-	simple_unlock(&rtclock.lock);	\
-	RTC_INTRS_ON(s);		\
-MACRO_END
-
-/*
- * i8254 control.  ** MONUMENT **
- *
- * The i8254 is a traditional PC device with some arbitrary characteristics.
- * Basically, it is a register that counts at a fixed rate and can be
- * programmed to generate an interrupt every N counts.  The count rate is
- * clknum counts per sec (see pit.h), historically 1193167=14.318MHz/12
- * but the more accurate value is 1193182=14.31818MHz/12. [14.31818 MHz being
- * the master crystal oscillator reference frequency since the very first PC.]
- * Various constants are computed based on this value, and we calculate
- * them at init time for execution efficiency.  To obtain sufficient
- * accuracy, some of the calculation are most easily done in floating
- * point and then converted to int.
- *
- */
-
-/*
- * Forward decl.
- */
-
-static uint64_t	rtc_set_cyc_per_sec(uint64_t cycles);
-uint64_t	rtc_nanotime_read(void);
-
-/*
- * create_mul_quant_GHZ
- *   create a constant used to multiply the TSC by to convert to nanoseconds.
- *   This is a 32 bit number and the TSC *MUST* have a frequency higher than
- *   1000Mhz for this routine to work.
- *
- * The theory here is that we know how many TSCs-per-sec the processor runs at.
- * Normally to convert this to nanoseconds you would multiply the current
- * timestamp by 1000000000 (a billion) then divide by TSCs-per-sec.
- * Unfortunatly the TSC is 64 bits which would leave us with 96 bit intermediate
- * results from the multiply that must be divided by.
- * Usually thats
- *   uint96 = tsc * numer
- *   nanos = uint96 / denom
- * Instead, we create this quant constant and it becomes the numerator,
- * the denominator can then be 0x100000000 which makes our division as simple as
- * forgetting the lower 32 bits of the result. We can also pass this number to
- * user space as the numer and pass 0xFFFFFFFF (RTC_FAST_DENOM) as the denom to
- * convert raw counts * to nanos. The difference is so small as to be
- * undetectable by anything.
- *
- * Unfortunatly we can not do this for sub GHZ processors. In this case, all
- * we do is pass the CPU speed in raw as the denom and we pass in 1000000000
- * as the numerator. No short cuts allowed
- */
-#define RTC_FAST_DENOM	0xFFFFFFFF
-inline static uint32_t
-create_mul_quant_GHZ(int shift, uint32_t quant)
-{
-	return (uint32_t)((((uint64_t)NSEC_PER_SEC/20) << shift) / quant);
-}
-/*
- * This routine takes a value of raw TSC ticks and applies the passed mul_quant
- * generated by create_mul_quant() This is our internal routine for creating
- * nanoseconds.
- * Since we don't really have uint96_t this routine basically does this....
- *   uint96_t intermediate = (*value) * scale
- *   return (intermediate >> 32)
- */
-inline static uint64_t
-fast_get_nano_from_abs(uint64_t value, int scale)
-{
-    asm (" 	movl	%%edx,%%esi	\n\t"
-         "      mull	%%ecx		\n\t"
-         "      movl	%%edx,%%edi	\n\t"
-         "      movl	%%esi,%%eax	\n\t"
-         "      mull	%%ecx		\n\t"
-         "      xorl	%%ecx,%%ecx	\n\t"	
-         "      addl	%%edi,%%eax	\n\t"	
-         "      adcl	%%ecx,%%edx	    "
-		: "+A" (value)
-		: "c" (scale)
-		: "%esi", "%edi");
-    return value;
-}
-
-/*
- * This routine basically does this...
- * ts.tv_sec = nanos / 1000000000;	create seconds
- * ts.tv_nsec = nanos % 1000000000;	create remainder nanos
- */
-inline static mach_timespec_t 
-nanos_to_timespec(uint64_t nanos)
-{
-	union {
-		mach_timespec_t ts;
-		uint64_t u64;
-	} ret;
-        ret.u64 = nanos;
-        asm volatile("divl %1" : "+A" (ret.u64) : "r" (NSEC_PER_SEC));
-        return ret.ts;
-}
-
-/*
- * The following two routines perform the 96 bit arithmetic we need to
- * convert generic absolute<->nanoseconds
- * The multiply routine takes a uint64_t and a uint32_t and returns the result
- * in a uint32_t[3] array.
- * The divide routine takes this uint32_t[3] array and divides it by a uint32_t
- * returning a uint64_t
- */
-inline static void
-longmul(uint64_t	*abstime, uint32_t multiplicand, uint32_t *result)
-{
-    asm volatile(
-        " pushl	%%ebx			\n\t"	
-        " movl	%%eax,%%ebx		\n\t"
-        " movl	(%%eax),%%eax		\n\t"
-        " mull	%%ecx			\n\t"
-        " xchg	%%eax,%%ebx		\n\t"
-        " pushl	%%edx			\n\t"
-        " movl	4(%%eax),%%eax		\n\t"
-        " mull	%%ecx			\n\t"
-        " movl	%2,%%ecx		\n\t"
-        " movl	%%ebx,(%%ecx)		\n\t"
-        " popl	%%ebx			\n\t"
-        " addl	%%ebx,%%eax		\n\t"
-        " popl	%%ebx			\n\t"
-        " movl	%%eax,4(%%ecx)		\n\t"
-        " adcl	$0,%%edx		\n\t"
-        " movl	%%edx,8(%%ecx)	// and save it"
-        : : "a"(abstime), "c"(multiplicand), "m"(result));
-    
-}
-
-inline static uint64_t
-longdiv(uint32_t *numer, uint32_t denom)
-{
-    uint64_t	result;
-    asm volatile(
-        " pushl	%%ebx			\n\t"
-        " movl	%%eax,%%ebx		\n\t"
-        " movl	8(%%eax),%%edx		\n\t"
-        " movl	4(%%eax),%%eax		\n\t"
-        " divl	%%ecx			\n\t"
-        " xchg	%%ebx,%%eax		\n\t"
-        " movl	(%%eax),%%eax		\n\t"
-        " divl	%%ecx			\n\t"
-        " xchg	%%ebx,%%edx		\n\t"
-        " popl	%%ebx			\n\t"
-        : "=A"(result) : "a"(numer),"c"(denom));
-    return result;
-}
-
-/*
- * Enable or disable timer 2.
- * Port 0x61 controls timer 2:
- *   bit 0 gates the clock,
- *   bit 1 gates output to speaker.
- */
-inline static void
-enable_PIT2(void)
-{
-    asm volatile(
-        " inb   $0x61,%%al      \n\t"
-        " and   $0xFC,%%al       \n\t"
-        " or    $1,%%al         \n\t"
-        " outb  %%al,$0x61      \n\t"
-        : : : "%al" );
-}
-
-inline static void
-disable_PIT2(void)
-{
-    asm volatile(
-        " inb   $0x61,%%al      \n\t"
-        " and   $0xFC,%%al      \n\t"
-        " outb  %%al,$0x61      \n\t"
-        : : : "%al" );
-}
-
-inline static void
-set_PIT2(int value)
-{
-/*
- * First, tell the clock we are going to write 16 bits to the counter
- *   and enable one-shot mode (command 0xB8 to port 0x43)
- * Then write the two bytes into the PIT2 clock register (port 0x42).
- * Loop until the value is "realized" in the clock,
- * this happens on the next tick.
- */
-    asm volatile(
-        " movb  $0xB8,%%al      \n\t"
-        " outb	%%al,$0x43	\n\t"
-        " movb	%%dl,%%al	\n\t"
-        " outb	%%al,$0x42	\n\t"
-        " movb	%%dh,%%al	\n\t"
-        " outb	%%al,$0x42	\n"
-"1:	  inb	$0x42,%%al	\n\t" 
-        " inb	$0x42,%%al	\n\t"
-        " cmp	%%al,%%dh	\n\t"
-        " jne	1b"
-        : : "d"(value) : "%al");
-}
-
-inline static uint64_t
-get_PIT2(unsigned int *value)
-{
-    register uint64_t	result;
-/*
- * This routine first latches the time (command 0x80 to port 0x43),
- * then gets the time stamp so we know how long the read will take later.
- * Read (from port 0x42) and return the current value of the timer.
- */
-    asm volatile(
-        " xorl	%%ecx,%%ecx	\n\t"
-        " movb	$0x80,%%al	\n\t"
-        " outb	%%al,$0x43	\n\t"
-        " rdtsc			\n\t"
-        " pushl	%%eax		\n\t"
-        " inb	$0x42,%%al	\n\t"
-        " movb	%%al,%%cl	\n\t"
-        " inb	$0x42,%%al	\n\t"
-        " movb	%%al,%%ch	\n\t"
-        " popl	%%eax	"
-        : "=A"(result), "=c"(*value));
-    return result;
-}
-
-/*
- * timeRDTSC()
- * This routine sets up PIT counter 2 to count down 1/20 of a second.
- * It pauses until the value is latched in the counter
- * and then reads the time stamp counter to return to the caller.
- */
-static uint64_t
-timeRDTSC(void)
-{
-    int		attempts = 0;
-    uint64_t	latchTime;
-    uint64_t	saveTime,intermediate;
-    unsigned int timerValue, lastValue;
-    boolean_t   int_enabled;
-    /*
-     * Table of correction factors to account for
-     *   - timer counter quantization errors, and
-     *   - undercounts 0..5
-     */
-#define	SAMPLE_CLKS_EXACT	(((double) CLKNUM) / 20.0)
-#define	SAMPLE_CLKS_INT		((int) CLKNUM / 20)
-#define SAMPLE_NSECS		(2000000000LL)
-#define SAMPLE_MULTIPLIER	(((double)SAMPLE_NSECS)*SAMPLE_CLKS_EXACT)
-#define ROUND64(x)		((uint64_t)((x) + 0.5))
-    uint64_t	scale[6] = {
-	ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-0)), 
-	ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-1)), 
-	ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-2)), 
-	ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-3)), 
-	ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-4)), 
-	ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-5))
-    };
-                            
-    int_enabled = ml_set_interrupts_enabled(FALSE);
-    
-restart:
-    if (attempts >= 2)
-	panic("timeRDTSC() calibation failed with %d attempts\n", attempts);
-    attempts++;
-    enable_PIT2();      // turn on PIT2
-    set_PIT2(0);	// reset timer 2 to be zero
-    latchTime = rdtsc64();	// get the time stamp to time 
-    latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes
-    set_PIT2(SAMPLE_CLKS_INT);	// set up the timer for (almost) 1/20th a second
-    saveTime = rdtsc64();	// now time how long a 20th a second is...
-    get_PIT2(&lastValue);
-    get_PIT2(&lastValue);	// read twice, first value may be unreliable
-    do {
-        intermediate = get_PIT2(&timerValue);
-        if (timerValue > lastValue) {
-	    printf("Hey we are going backwards! %u -> %u, restarting timing\n",
-			timerValue,lastValue);
-	    set_PIT2(0);
-	    disable_PIT2();
-	    goto restart;
-	}
-        lastValue = timerValue;
-    } while (timerValue > 5);
-    kprintf("timerValue   %d\n",timerValue);
-    kprintf("intermediate 0x%016llx\n",intermediate);
-    kprintf("saveTime     0x%016llx\n",saveTime);
-    
-    intermediate -= saveTime;		// raw count for about 1/20 second
-    intermediate *= scale[timerValue];	// rescale measured time spent
-    intermediate /= SAMPLE_NSECS;	// so its exactly 1/20 a second
-    intermediate += latchTime;		// add on our save fudge
-    
-    set_PIT2(0);			// reset timer 2 to be zero
-    disable_PIT2();     		// turn off PIT 2
-
-    ml_set_interrupts_enabled(int_enabled);
-    return intermediate;
-}
-
-static uint64_t
-tsc_to_nanoseconds(uint64_t abstime)
-{
-        uint32_t	numer;
-        uint32_t	denom;
-        uint32_t	intermediate[3];
-        
-        numer = rtclock.timebase_const.numer;
-        denom = rtclock.timebase_const.denom;
-        if (denom == RTC_FAST_DENOM) {
-            abstime = fast_get_nano_from_abs(abstime, numer);
-        } else {
-            longmul(&abstime, numer, intermediate);
-            abstime = longdiv(intermediate, denom);
-        }
-        return abstime;
-}
-
-inline static mach_timespec_t 
-tsc_to_timespec(void)
-{
-        uint64_t	currNanos;
-        currNanos = rtc_nanotime_read();
-        return nanos_to_timespec(currNanos);
-}
-
-#define	DECREMENTER_MAX		UINT_MAX
-static uint32_t
-deadline_to_decrementer(
-	uint64_t	deadline,
-	uint64_t	now)
-{
-	uint64_t	delta;
+int             rtclock_init(void);
 
-	if (deadline <= now)
-		return rtc_decrementer_min;
-	else {
-		delta = deadline - now;
-		return MIN(MAX(rtc_decrementer_min,delta),DECREMENTER_MAX); 
-	}
-}
+uint64_t        tsc_rebase_abs_time = 0;
 
-static inline uint64_t
-lapic_time_countdown(uint32_t initial_count)
-{
-	boolean_t		state;
-	uint64_t		start_time;
-	uint64_t		stop_time;
-	lapic_timer_count_t	count;
-
-	state = ml_set_interrupts_enabled(FALSE);
-	lapic_set_timer(FALSE, one_shot, divide_by_1, initial_count);
-	start_time = rdtsc64();
-	do {
-		lapic_get_timer(NULL, NULL, NULL, &count);
-	} while (count > 0);
-	stop_time = rdtsc64();
-	ml_set_interrupts_enabled(state);
-
-	return tsc_to_nanoseconds(stop_time - start_time);
-}
+static void     rtc_set_timescale(uint64_t cycles);
+static uint64_t rtc_export_speed(uint64_t cycles);
 
-static void
-rtc_lapic_timer_calibrate(void)
+void
+rtc_timer_start(void)
 {
-	uint32_t	nsecs;
-	uint64_t	countdown;
-
-	if (!(cpuid_features() & CPUID_FEATURE_APIC))
-		return;
-
-	/*
-	 * Set the local apic timer counting down to zero without an interrupt.
-	 * Use the timestamp to calculate how long this takes.
-	 */ 
-	nsecs = (uint32_t) lapic_time_countdown(rtc_intr_nsec);
-
 	/*
-	 * Compute a countdown ratio for a given time in nanoseconds.
-	 * That is, countdown = time * numer / denom.
+	 * Force a complete re-evaluation of timer deadlines.
 	 */
-	countdown = (uint64_t)rtc_intr_nsec * (uint64_t)rtc_intr_nsec / nsecs;
-
-	nsecs = (uint32_t) lapic_time_countdown((uint32_t) countdown);
-
-	rtc_lapic_scale.numer = countdown;
-	rtc_lapic_scale.denom = nsecs;
-
-	kprintf("rtc_lapic_timer_calibrate() scale: %d/%d\n",
-		(uint32_t) countdown, nsecs);
-}
-
-static void
-rtc_lapic_set_timer(
-	uint32_t	interval)
-{
-	uint64_t	count;
-
-	assert(rtc_lapic_scale.denom);
-
-	count = interval * (uint64_t) rtc_lapic_scale.numer;
-	count /= rtc_lapic_scale.denom;
-
-	lapic_set_timer(TRUE, one_shot, divide_by_1, (uint32_t) count);
+	x86_lcpu()->rtcDeadline = EndOfAllTime;
+	timer_resync_deadlines();
 }
 
-static void
-rtc_lapic_start_ticking(void)
+static inline uint32_t
+_absolutetime_to_microtime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *microsecs)
 {
-	uint64_t	abstime;
-	uint64_t	first_tick;
-	uint64_t	decr;
-
-	abstime = mach_absolute_time();
-	first_tick = abstime + NSEC_PER_HZ;
-	current_cpu_datap()->cpu_rtc_tick_deadline = first_tick;
-	decr = deadline_to_decrementer(first_tick, abstime);
-	rtc_lapic_set_timer(decr);
+	uint32_t remain;
+	*secs = abstime / (uint64_t)NSEC_PER_SEC;
+	remain = (uint32_t)(abstime % (uint64_t)NSEC_PER_SEC);
+	*microsecs = remain / NSEC_PER_USEC;
+	return remain;
 }
 
-/*
- * Configure the real-time clock device. Return success (1)
- * or failure (0).
- */
-
-int
-sysclk_config(void)
+static inline void
+_absolutetime_to_nanotime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *nanosecs)
 {
-
-	mp_disable_preemption();
-	if (cpu_number() != master_cpu) {
-		mp_enable_preemption();
-		return(1);
-	}
-	mp_enable_preemption();
-
-	timer_call_setup(&rtclock_alarm_timer, rtclock_alarm_expire, NULL);
-
-	simple_lock_init(&rtclock.lock, 0);
-
-	return (1);
+	*secs = abstime / (uint64_t)NSEC_PER_SEC;
+	*nanosecs = (clock_usec_t)(abstime % (uint64_t)NSEC_PER_SEC);
 }
 
-
 /*
  * Nanotime/mach_absolutime_time
  * -----------------------------
- * The timestamp counter (tsc) - which counts cpu clock cycles and can be read
- * efficient by the kernel and in userspace - is the reference for all timing.
- * However, the cpu clock rate is not only platform-dependent but can change
- * (speed-step) dynamically. Hence tsc is converted into nanoseconds which is
- * identical to mach_absolute_time. The conversion to tsc to nanoseconds is
- * encapsulated by nanotime.
+ * The timestamp counter (TSC) - which counts cpu clock cycles and can be read
+ * efficiently by the kernel and in userspace - is the reference for all timing.
+ * The cpu clock rate is platform-dependent and may stop or be reset when the
+ * processor is napped/slept.  As a result, nanotime is the software abstraction
+ * used to maintain a monotonic clock, adjusted from an outside reference as needed.
  *
  * The kernel maintains nanotime information recording:
- * 	- the current ratio of tsc to nanoseconds
+ *      - the ratio of tsc to nanoseconds
  *	  with this ratio expressed as a 32-bit scale and shift
  *	  (power of 2 divider);
- *	- the tsc (step_tsc) and nanotime (step_ns) at which the current
- *	  ratio (clock speed) began.
- * So a tsc value can be converted to nanotime by:
- *
- *	nanotime = (((tsc - step_tsc)*scale) >> shift) + step_ns
+ *	- { tsc_base, ns_base } pair of corresponding timestamps.
  *
- * In general, (tsc - step_tsc) is a 64-bit quantity with the scaling
- * involving a 96-bit intermediate value. However, by saving the converted 
- * values at each tick (or at any intervening speed-step) - base_tsc and
- * base_ns - we can perform conversions relative to these and be assured that
- * (tsc - tick_tsc) is 32-bits. Hence:
+ * The tuple {tsc_base, ns_base, scale, shift} is exported in the commpage
+ * for the userspace nanotime routine to read.
  *
- * 	fast_nanotime = (((tsc - base_tsc)*scale) >> shift) + base_ns  
- *
- * The tuple {base_tsc, base_ns, scale, shift} is exported in the commpage 
- * for the userspace nanotime routine to read. A duplicate check_tsc is
- * appended so that the consistency of the read can be verified. Note that
- * this scheme is essential for MP systems in which the commpage is updated
- * by the master cpu but may be read concurrently by other cpus.
- * 
+ * All of the routines which update the nanotime data are non-reentrant.  This must
+ * be guaranteed by the caller.
  */
 static inline void
-rtc_nanotime_set_commpage(rtc_nanotime_t *rntp)
-{
-	commpage_nanotime_t	cp_nanotime;
-
-	/* Only the master cpu updates the commpage */
-	if (cpu_number() != master_cpu)
-		return;
-
-	cp_nanotime.nt_base_tsc = rntp->rnt_tsc;
-	cp_nanotime.nt_base_ns = rntp->rnt_nanos;
-	cp_nanotime.nt_scale = rntp->rnt_scale;
-	cp_nanotime.nt_shift = rntp->rnt_shift;
-
-	commpage_set_nanotime(&cp_nanotime);
-}
-
-static void
-rtc_nanotime_init(void)
+rtc_nanotime_set_commpage(pal_rtc_nanotime_t *rntp)
 {
-	rtc_nanotime_t	*rntp = &current_cpu_datap()->cpu_rtc_nanotime;
-	rtc_nanotime_t	*master_rntp = &cpu_datap(master_cpu)->cpu_rtc_nanotime;
-
-	if (cpu_number() == master_cpu) {
-		rntp->rnt_tsc = rdtsc64();
-		rntp->rnt_nanos = tsc_to_nanoseconds(rntp->rnt_tsc);
-		rntp->rnt_scale = rtc_quant_scale;
-		rntp->rnt_shift = rtc_quant_shift;
-		rntp->rnt_step_tsc = 0ULL;
-		rntp->rnt_step_nanos = 0ULL;
-	} else {
-		/*
-		 * Copy master processor's nanotime info.
-		 * Loop required in case this changes while copying.
-		 */
-		do {
-			*rntp = *master_rntp;
-		} while (rntp->rnt_tsc != master_rntp->rnt_tsc);
-	}
+	commpage_set_nanotime(rntp->tsc_base, rntp->ns_base, rntp->scale, rntp->shift);
 }
 
+/*
+ * rtc_nanotime_init:
+ *
+ * Intialize the nanotime info from the base time.
+ */
 static inline void
-_rtc_nanotime_update(rtc_nanotime_t *rntp, uint64_t	tsc)
+_rtc_nanotime_init(pal_rtc_nanotime_t *rntp, uint64_t base)
 {
-	uint64_t	tsc_delta;
-	uint64_t	ns_delta;
+	uint64_t        tsc = rdtsc64();
 
-	tsc_delta = tsc - rntp->rnt_step_tsc;
-	ns_delta = tsc_to_nanoseconds(tsc_delta);
-	rntp->rnt_nanos = rntp->rnt_step_nanos + ns_delta;
-	rntp->rnt_tsc = tsc;
+	_pal_rtc_nanotime_store(tsc, base, rntp->scale, rntp->shift, rntp);
 }
 
-static void
-rtc_nanotime_update(void)
-{
-	rtc_nanotime_t	*rntp = &current_cpu_datap()->cpu_rtc_nanotime;
-
-	assert(get_preemption_level() > 0);
-	assert(!ml_get_interrupts_enabled());
-        
-	_rtc_nanotime_update(rntp, rdtsc64());
-	rtc_nanotime_set_commpage(rntp);
-}
-
-static void
-rtc_nanotime_scale_update(void)
+void
+rtc_nanotime_init(uint64_t base)
 {
-	rtc_nanotime_t	*rntp = &current_cpu_datap()->cpu_rtc_nanotime;
-	uint64_t	tsc = rdtsc64();
-
-	assert(!ml_get_interrupts_enabled());
-        
-	/*
-	 * Update time based on past scale.
-	 */
-	_rtc_nanotime_update(rntp, tsc);
-
-	/*
-	 * Update scale and timestamp this update.
-	 */
-	rntp->rnt_scale = rtc_quant_scale;
-	rntp->rnt_shift = rtc_quant_shift;
-	rntp->rnt_step_tsc = rntp->rnt_tsc;
-	rntp->rnt_step_nanos = rntp->rnt_nanos;
-
-	/* Export update to userland */
-	rtc_nanotime_set_commpage(rntp);
+	_rtc_nanotime_init(&pal_rtc_nanotime_info, base);
+	rtc_nanotime_set_commpage(&pal_rtc_nanotime_info);
 }
 
-static uint64_t
-_rtc_nanotime_read(void)
+/*
+ * rtc_nanotime_init_commpage:
+ *
+ * Call back from the commpage initialization to
+ * cause the commpage data to be filled in once the
+ * commpages have been created.
+ */
+void
+rtc_nanotime_init_commpage(void)
 {
-	rtc_nanotime_t	*rntp = &current_cpu_datap()->cpu_rtc_nanotime;
-	uint64_t	rnt_tsc;
-	uint32_t	rnt_scale;
-	uint32_t	rnt_shift;
-	uint64_t	rnt_nanos;
-	uint64_t	tsc;
-	uint64_t	tsc_delta;
-
-	rnt_scale = rntp->rnt_scale;
-	if (rnt_scale == 0)
-		return 0ULL;
-
-	rnt_shift = rntp->rnt_shift;
-	rnt_nanos = rntp->rnt_nanos;
-	rnt_tsc = rntp->rnt_tsc;
-	tsc = rdtsc64();
+	spl_t                   s = splclock();
 
-	tsc_delta = tsc - rnt_tsc;
-	if ((tsc_delta >> 32) != 0)
-		return rnt_nanos + tsc_to_nanoseconds(tsc_delta);
-
-	/* Let the compiler optimize(?): */
-	if (rnt_shift == 32)
-		return rnt_nanos + ((tsc_delta * rnt_scale) >> 32);	
-	else 
-		return rnt_nanos + ((tsc_delta * rnt_scale) >> rnt_shift);
+	rtc_nanotime_set_commpage(&pal_rtc_nanotime_info);
+	splx(s);
 }
 
-uint64_t
+/*
+ * rtc_nanotime_read:
+ *
+ * Returns the current nanotime value, accessable from any
+ * context.
+ */
+static inline uint64_t
 rtc_nanotime_read(void)
 {
-	uint64_t	result;
-	uint64_t	rnt_tsc;
-	rtc_nanotime_t	*rntp = &current_cpu_datap()->cpu_rtc_nanotime;
-
-	/*
-	 * Use timestamp to ensure the uptime record isn't changed.
-	 * This avoids disabling interrupts.
-	 * And not this is a per-cpu structure hence no locking.
-	 */
-	do {
-		rnt_tsc = rntp->rnt_tsc;
-		result = _rtc_nanotime_read();
-	} while (rnt_tsc != rntp->rnt_tsc);
-
-	return result;
+	return _rtc_nanotime_read(&pal_rtc_nanotime_info);
 }
 
-
 /*
- * This function is called by the speed-step driver when a
- * change of cpu clock frequency is about to occur.
- * The scale is not changed until rtc_clock_stepped() is called.
- * Between these times there is an uncertainty is exactly when
- * the change takes effect. FIXME: by using another timing source
- * we could eliminate this error.
+ * rtc_clock_napped:
+ *
+ * Invoked from power management when we exit from a low C-State (>= C4)
+ * and the TSC has stopped counting.  The nanotime data is updated according
+ * to the provided value which represents the new value for nanotime.
  */
 void
-rtc_clock_stepping(__unused uint32_t new_frequency,
-		   __unused uint32_t old_frequency)
+rtc_clock_napped(uint64_t base, uint64_t tsc_base)
 {
-	boolean_t	istate;
+	pal_rtc_nanotime_t      *rntp = &pal_rtc_nanotime_info;
+	uint64_t        oldnsecs;
+	uint64_t        newnsecs;
+	uint64_t        tsc;
+
+	assert(!ml_get_interrupts_enabled());
+	tsc = rdtsc64();
+	oldnsecs = rntp->ns_base + _rtc_tsc_to_nanoseconds(tsc - rntp->tsc_base, rntp);
+	newnsecs = base + _rtc_tsc_to_nanoseconds(tsc - tsc_base, rntp);
 
-	istate = ml_set_interrupts_enabled(FALSE);
-	rtc_nanotime_scale_update();
-	ml_set_interrupts_enabled(istate);
+	/*
+	 * Only update the base values if time using the new base values
+	 * is later than the time using the old base values.
+	 */
+	if (oldnsecs < newnsecs) {
+		_pal_rtc_nanotime_store(tsc_base, base, rntp->scale, rntp->shift, rntp);
+		rtc_nanotime_set_commpage(rntp);
+	}
 }
 
 /*
- * This function is called by the speed-step driver when a
- * change of cpu clock frequency has just occured. This change
- * is expressed as a ratio relative to the boot clock rate.
+ * Invoked from power management to correct the SFLM TSC entry drift problem:
+ * a small delta is added to the tsc_base.  This is equivalent to nudgin time
+ * backwards.  We require this to be on the order of a TSC quantum which won't
+ * cause callers of mach_absolute_time() to see time going backwards!
  */
 void
-rtc_clock_stepped(uint32_t new_frequency, uint32_t old_frequency)
+rtc_clock_adjust(uint64_t tsc_base_delta)
 {
-	boolean_t	istate;
+	pal_rtc_nanotime_t  *rntp = &pal_rtc_nanotime_info;
 
-	istate = ml_set_interrupts_enabled(FALSE);
-	if (rtc_boot_frequency == 0) {
-		/*
-		 * At the first ever stepping, old frequency is the real
-		 * initial clock rate. This step and all others are based
-		 * relative to this initial frequency at which the tsc
-		 * calibration was made. Hence we must remember this base
-		 * frequency as reference.
-		 */
-		rtc_boot_frequency = old_frequency;
-	}
-	rtc_set_cyc_per_sec(rtc_cycle_count * new_frequency /
-				rtc_boot_frequency);
-	rtc_nanotime_scale_update();
-	ml_set_interrupts_enabled(istate);
+	assert(!ml_get_interrupts_enabled());
+	assert(tsc_base_delta < 100ULL); /* i.e. it's small */
+	_rtc_nanotime_adjust(tsc_base_delta, rntp);
+	rtc_nanotime_set_commpage(rntp);
 }
 
 /*
- * rtc_sleep_wakeup() is called from acpi on awakening from a S3 sleep
+ * rtc_sleep_wakeup:
+ *
+ * Invoked from power management when we have awoken from a sleep (S3)
+ * and the TSC has been reset, or from Deep Idle (S0) sleep when the TSC
+ * has progressed.  The nanotime data is updated based on the passed-in value.
+ *
+ * The caller must guarantee non-reentrancy.
  */
 void
-rtc_sleep_wakeup(void)
+rtc_sleep_wakeup(
+	uint64_t                base)
 {
-	rtc_nanotime_t	*rntp = &current_cpu_datap()->cpu_rtc_nanotime;
-
-	boolean_t	istate;
-
-	istate = ml_set_interrupts_enabled(FALSE);
+	/* Set fixed configuration for lapic timers */
+	rtc_timer->rtc_config();
 
 	/*
 	 * Reset nanotime.
 	 * The timestamp counter will have been reset
 	 * but nanotime (uptime) marches onward.
-	 * We assume that we're still at the former cpu frequency.
 	 */
-	rntp->rnt_tsc = rdtsc64();
-	rntp->rnt_step_tsc = 0ULL;
-	rntp->rnt_step_nanos = rntp->rnt_nanos;
-	rtc_nanotime_set_commpage(rntp);
-
-	/* Restart tick interrupts from the LAPIC timer */
-	rtc_lapic_start_ticking();
+	rtc_nanotime_init(base);
+}
 
-	ml_set_interrupts_enabled(istate);
+void
+rtc_decrementer_configure(void)
+{
+	rtc_timer->rtc_config();
+}
+/*
+ * rtclock_early_init() is called very early at boot to
+ * establish mach_absolute_time() and set it to zero.
+ */
+void
+rtclock_early_init(void)
+{
+	assert(tscFreq);
+	rtc_set_timescale(tscFreq);
 }
 
 /*
@@ -871,631 +272,263 @@ rtc_sleep_wakeup(void)
  * In addition, various variables used to support the clock are initialized.
  */
 int
-sysclk_init(void)
+rtclock_init(void)
 {
-	uint64_t	cycles;
+	uint64_t        cycles;
+
+	assert(!ml_get_interrupts_enabled());
 
-	mp_disable_preemption();
 	if (cpu_number() == master_cpu) {
+		assert(tscFreq);
+
 		/*
-		 * Perform calibration.
-		 * The PIT is used as the reference to compute how many
-		 * TCS counts (cpu clock cycles) occur per second.
+		 * Adjust and set the exported cpu speed.
 		 */
-        	rtc_cycle_count = timeRDTSC();
-		cycles = rtc_set_cyc_per_sec(rtc_cycle_count);
+		cycles = rtc_export_speed(tscFreq);
 
 		/*
 		 * Set min/max to actual.
 		 * ACPI may update these later if speed-stepping is detected.
 		 */
-        	gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles;
-        	gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles;
-		printf("[RTCLOCK] frequency %llu (%llu)\n",
-		       cycles, rtc_cyc_per_sec);
-
-		rtc_lapic_timer_calibrate();
-
-		/* Minimum interval is 1usec */
-		rtc_decrementer_min = deadline_to_decrementer(NSEC_PER_USEC,
-								0ULL);
-		/* Point LAPIC interrupts to hardclock() */
-		lapic_set_timer_func((i386_intr_func_t) rtclock_intr);
+		gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles;
+		gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles;
 
+		rtc_timer_init();
 		clock_timebase_init();
-		rtc_initialized = TRUE;
+		ml_init_lock_timeout();
+		ml_init_delay_spin_threshold(10);
 	}
 
-	rtc_nanotime_init();
-
-	rtc_lapic_start_ticking();
-
-	mp_enable_preemption();
+	/* Set fixed configuration for lapic timers */
+	rtc_timer->rtc_config();
+	rtc_timer_start();
 
-	return (1);
+	return 1;
 }
 
-/*
- * Get the clock device time. This routine is responsible
- * for converting the device's machine dependent time value
- * into a canonical mach_timespec_t value.
- */
-static kern_return_t
-sysclk_gettime_internal(
-	mach_timespec_t	*cur_time)	/* OUT */
-{
-	*cur_time = tsc_to_timespec();
-	return (KERN_SUCCESS);
-}
+// utility routine
+// Code to calculate how many processor cycles are in a second...
 
-kern_return_t
-sysclk_gettime(
-	mach_timespec_t	*cur_time)	/* OUT */
+static void
+rtc_set_timescale(uint64_t cycles)
 {
-	return sysclk_gettime_internal(cur_time);
-}
+	pal_rtc_nanotime_t      *rntp = &pal_rtc_nanotime_info;
+	uint32_t    shift = 0;
 
-void
-sysclk_gettime_interrupts_disabled(
-	mach_timespec_t	*cur_time)	/* OUT */
-{
-	(void) sysclk_gettime_internal(cur_time);
-}
+	/* the "scale" factor will overflow unless cycles>SLOW_TSC_THRESHOLD */
 
-// utility routine 
-// Code to calculate how many processor cycles are in a second...
+	while (cycles <= SLOW_TSC_THRESHOLD) {
+		shift++;
+		cycles <<= 1;
+	}
 
-static uint64_t
-rtc_set_cyc_per_sec(uint64_t cycles)
-{
+	rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / cycles);
 
-        if (cycles > (NSEC_PER_SEC/20)) {
-            // we can use just a "fast" multiply to get nanos
-	    rtc_quant_shift = 32;
-            rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, cycles);
-            rtclock.timebase_const.numer = rtc_quant_scale; // timeRDTSC is 1/20
-	    rtclock.timebase_const.denom = RTC_FAST_DENOM;
-        } else {
-	    rtc_quant_shift = 26;
-            rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, cycles);
-            rtclock.timebase_const.numer = NSEC_PER_SEC/20; // timeRDTSC is 1/20
-            rtclock.timebase_const.denom = cycles;
-        }
-	rtc_cyc_per_sec = cycles*20;	// multiply it by 20 and we are done..
-					// BUT we also want to calculate...
-
-        cycles = ((rtc_cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR/2))
-			/ UI_CPUFREQ_ROUNDING_FACTOR)
-				* UI_CPUFREQ_ROUNDING_FACTOR;
+	rntp->shift = shift;
 
 	/*
-	 * Set current measured speed.
+	 * On some platforms, the TSC is not reset at warm boot. But the
+	 * rebase time must be relative to the current boot so we can't use
+	 * mach_absolute_time(). Instead, we convert the TSC delta since boot
+	 * to nanoseconds.
 	 */
-        if (cycles >= 0x100000000ULL) {
-            gPEClockFrequencyInfo.cpu_clock_rate_hz = 0xFFFFFFFFUL;
-        } else {
-            gPEClockFrequencyInfo.cpu_clock_rate_hz = (unsigned long)cycles;
-        }
-        gPEClockFrequencyInfo.cpu_frequency_hz = cycles;
-
-	kprintf("[RTCLOCK] frequency %llu (%llu)\n", cycles, rtc_cyc_per_sec);
-	return(cycles);
-}
-
-void
-clock_get_system_microtime(
-	uint32_t			*secs,
-	uint32_t			*microsecs)
-{
-	mach_timespec_t		now;
-
-	(void) sysclk_gettime_internal(&now);
-
-	*secs = now.tv_sec;
-	*microsecs = now.tv_nsec / NSEC_PER_USEC;
-}
-
-void
-clock_get_system_nanotime(
-	uint32_t			*secs,
-	uint32_t			*nanosecs)
-{
-	mach_timespec_t		now;
-
-	(void) sysclk_gettime_internal(&now);
-
-	*secs = now.tv_sec;
-	*nanosecs = now.tv_nsec;
-}
-
-/*
- * Get clock device attributes.
- */
-kern_return_t
-sysclk_getattr(
-	clock_flavor_t		flavor,
-	clock_attr_t		attr,		/* OUT */
-	mach_msg_type_number_t	*count)		/* IN/OUT */
-{
-	if (*count != 1)
-		return (KERN_FAILURE);
-	switch (flavor) {
-
-	case CLOCK_GET_TIME_RES:	/* >0 res */
-		*(clock_res_t *) attr = rtc_intr_nsec;
-		break;
-
-	case CLOCK_ALARM_CURRES:	/* =0 no alarm */
-	case CLOCK_ALARM_MAXRES:
-	case CLOCK_ALARM_MINRES:
-		*(clock_res_t *) attr = 0;
-		break;
-
-	default:
-		return (KERN_INVALID_VALUE);
+	if (tsc_rebase_abs_time == 0) {
+		tsc_rebase_abs_time = _rtc_tsc_to_nanoseconds(
+			rdtsc64() - tsc_at_boot, rntp);
 	}
-	return (KERN_SUCCESS);
-}
-
-/*
- * Set next alarm time for the clock device. This call
- * always resets the time to deliver an alarm for the
- * clock.
- */
-void
-sysclk_setalarm(
-	mach_timespec_t	*alarm_time)
-{
-	timer_call_enter(&rtclock_alarm_timer,
-			 (uint64_t) alarm_time->tv_sec * NSEC_PER_SEC
-				+ alarm_time->tv_nsec);
-}
 
-/*
- * Configure the calendar clock.
- */
-int
-calend_config(void)
-{
-	return bbc_config();
+	rtc_nanotime_init(0);
 }
 
-/*
- * Initialize calendar clock.
- */
-int
-calend_init(void)
-{
-	return (1);
-}
-
-/*
- * Get the current clock time.
- */
-kern_return_t
-calend_gettime(
-	mach_timespec_t	*cur_time)	/* OUT */
+static uint64_t
+rtc_export_speed(uint64_t cyc_per_sec)
 {
-	spl_t		s;
+	pal_rtc_nanotime_t      *rntp = &pal_rtc_nanotime_info;
+	uint64_t        cycles;
 
-	RTC_LOCK(s);
-	if (!rtclock.calend_is_set) {
-		RTC_UNLOCK(s);
-		return (KERN_FAILURE);
+	if (rntp->shift != 0) {
+		printf("Slow TSC, rtc_nanotime.shift == %d\n", rntp->shift);
 	}
 
-	(void) sysclk_gettime_internal(cur_time);
-	ADD_MACH_TIMESPEC(cur_time, &rtclock.calend_offset);
-	RTC_UNLOCK(s);
-
-	return (KERN_SUCCESS);
-}
-
-void
-clock_get_calendar_microtime(
-	uint32_t			*secs,
-	uint32_t			*microsecs)
-{
-	mach_timespec_t		now;
+	/* Round: */
+	cycles = ((cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR / 2))
+	    / UI_CPUFREQ_ROUNDING_FACTOR)
+	    * UI_CPUFREQ_ROUNDING_FACTOR;
 
-	calend_gettime(&now);
+	/*
+	 * Set current measured speed.
+	 */
+	if (cycles >= 0x100000000ULL) {
+		gPEClockFrequencyInfo.cpu_clock_rate_hz = 0xFFFFFFFFUL;
+	} else {
+		gPEClockFrequencyInfo.cpu_clock_rate_hz = (unsigned long)cycles;
+	}
+	gPEClockFrequencyInfo.cpu_frequency_hz = cycles;
 
-	*secs = now.tv_sec;
-	*microsecs = now.tv_nsec / NSEC_PER_USEC;
+	kprintf("[RTCLOCK] frequency %llu (%llu)\n", cycles, cyc_per_sec);
+	return cycles;
 }
 
 void
-clock_get_calendar_nanotime(
-	uint32_t			*secs,
-	uint32_t			*nanosecs)
+clock_get_system_microtime(
+	clock_sec_t                     *secs,
+	clock_usec_t            *microsecs)
 {
-	mach_timespec_t		now;
+	uint64_t        now = rtc_nanotime_read();
 
-	calend_gettime(&now);
-
-	*secs = now.tv_sec;
-	*nanosecs = now.tv_nsec;
+	_absolutetime_to_microtime(now, secs, microsecs);
 }
 
 void
-clock_set_calendar_microtime(
-	uint32_t			secs,
-	uint32_t			microsecs)
-{
-	mach_timespec_t		new_time, curr_time;
-	uint32_t			old_offset;
-	spl_t		s;
-
-	new_time.tv_sec = secs;
-	new_time.tv_nsec = microsecs * NSEC_PER_USEC;
-
-	RTC_LOCK(s);
-	old_offset = rtclock.calend_offset.tv_sec;
-	(void) sysclk_gettime_internal(&curr_time);
-	rtclock.calend_offset = new_time;
-	SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time);
-	rtclock.boottime += rtclock.calend_offset.tv_sec - old_offset;
-	rtclock.calend_is_set = TRUE;
-	RTC_UNLOCK(s);
-
-	(void) bbc_settime(&new_time);
-
-	host_notify_calendar_change();
-}
-
-/*
- * Get clock device attributes.
- */
-kern_return_t
-calend_getattr(
-	clock_flavor_t		flavor,
-	clock_attr_t		attr,		/* OUT */
-	mach_msg_type_number_t	*count)		/* IN/OUT */
-{
-	if (*count != 1)
-		return (KERN_FAILURE);
-	switch (flavor) {
-
-	case CLOCK_GET_TIME_RES:	/* >0 res */
-		*(clock_res_t *) attr = rtc_intr_nsec;
-		break;
-
-	case CLOCK_ALARM_CURRES:	/* =0 no alarm */
-	case CLOCK_ALARM_MINRES:
-	case CLOCK_ALARM_MAXRES:
-		*(clock_res_t *) attr = 0;
-		break;
-
-	default:
-		return (KERN_INVALID_VALUE);
-	}
-	return (KERN_SUCCESS);
-}
-
-#define tickadj		(40*NSEC_PER_USEC)	/* "standard" skew, ns / tick */
-#define	bigadj		(NSEC_PER_SEC)		/* use 10x skew above bigadj ns */
-
-uint32_t
-clock_set_calendar_adjtime(
-	int32_t				*secs,
-	int32_t				*microsecs)
-{
-	int64_t			total, ototal;
-	uint32_t		interval = 0;
-	spl_t			s;
-
-	total = (int64_t)*secs * NSEC_PER_SEC + *microsecs * NSEC_PER_USEC;
-
-	RTC_LOCK(s);
-	ototal = rtclock.calend_adjtotal;
-
-	if (total != 0) {
-		int32_t		delta = tickadj;
-
-		if (total > 0) {
-			if (total > bigadj)
-				delta *= 10;
-			if (delta > total)
-				delta = total;
-		}
-		else {
-			if (total < -bigadj)
-				delta *= 10;
-			delta = -delta;
-			if (delta < total)
-				delta = total;
-		}
-
-		rtclock.calend_adjtotal = total;
-		rtclock.calend_adjdelta = delta;
-
-		interval = NSEC_PER_HZ;
-	}
-	else
-		rtclock.calend_adjdelta = rtclock.calend_adjtotal = 0;
-
-	RTC_UNLOCK(s);
-
-	if (ototal == 0)
-		*secs = *microsecs = 0;
-	else {
-		*secs = ototal / NSEC_PER_SEC;
-		*microsecs = ototal % NSEC_PER_SEC;
-	}
-
-	return (interval);
-}
-
-uint32_t
-clock_adjust_calendar(void)
+clock_get_system_nanotime(
+	clock_sec_t                     *secs,
+	clock_nsec_t            *nanosecs)
 {
-	uint32_t		interval = 0;
-	int32_t			delta;
-	spl_t			s;
-
-	RTC_LOCK(s);
-	delta = rtclock.calend_adjdelta;
-	ADD_MACH_TIMESPEC_NSEC(&rtclock.calend_offset, delta);
-
-	rtclock.calend_adjtotal -= delta;
-
-	if (delta > 0) {
-		if (delta > rtclock.calend_adjtotal)
-			rtclock.calend_adjdelta = rtclock.calend_adjtotal;
-	}
-	else
-	if (delta < 0) {
-		if (delta < rtclock.calend_adjtotal)
-			rtclock.calend_adjdelta = rtclock.calend_adjtotal;
-	}
-
-	if (rtclock.calend_adjdelta != 0)
-		interval = NSEC_PER_HZ;
-
-	RTC_UNLOCK(s);
+	uint64_t        now = rtc_nanotime_read();
 
-	return (interval);
+	_absolutetime_to_nanotime(now, secs, nanosecs);
 }
 
 void
-clock_initialize_calendar(void)
+clock_gettimeofday_set_commpage(uint64_t abstime, uint64_t sec, uint64_t frac, uint64_t scale, uint64_t tick_per_sec)
 {
-	mach_timespec_t	bbc_time, curr_time;
-	spl_t		s;
-
-	if (bbc_gettime(&bbc_time) != KERN_SUCCESS)
-		return;
-
-	RTC_LOCK(s);
-	if (rtclock.boottime == 0)
-		rtclock.boottime = bbc_time.tv_sec;
-	(void) sysclk_gettime_internal(&curr_time);
-	rtclock.calend_offset = bbc_time;
-	SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time);
-	rtclock.calend_is_set = TRUE;
-	RTC_UNLOCK(s);
-
-	host_notify_calendar_change();
-}
-
-void
-clock_get_boottime_nanotime(
-	uint32_t			*secs,
-	uint32_t			*nanosecs)
-{
-	*secs = rtclock.boottime;
-	*nanosecs = 0;
+	commpage_set_timestamp(abstime, sec, frac, scale, tick_per_sec);
 }
 
 void
 clock_timebase_info(
-	mach_timebase_info_t	info)
+	mach_timebase_info_t    info)
 {
 	info->numer = info->denom =  1;
-}	
-
-void
-clock_set_timer_deadline(
-	uint64_t			deadline)
-{
-	spl_t		s;
-	cpu_data_t	*pp = current_cpu_datap();
-	rtclock_timer_t	*mytimer = &pp->cpu_rtc_timer;
-	uint64_t	abstime;
-	uint64_t	decr;
-
-	assert(get_preemption_level() > 0);
-	assert(rtclock_timer_expire);
-
-	RTC_INTRS_OFF(s);
-	mytimer->deadline = deadline;
-	mytimer->is_set = TRUE;
-	if (!mytimer->has_expired) {
-		abstime = mach_absolute_time();
-		if (mytimer->deadline < pp->cpu_rtc_tick_deadline) {
-			decr = deadline_to_decrementer(mytimer->deadline,
-						       abstime);
-			rtc_lapic_set_timer(decr);
-			pp->cpu_rtc_intr_deadline = mytimer->deadline;
-			KERNEL_DEBUG_CONSTANT(
-				MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) |
-					DBG_FUNC_NONE, decr, 2, 0, 0, 0);
-		}
-	}
-	RTC_INTRS_ON(s);
-}
-
-void
-clock_set_timer_func(
-	clock_timer_func_t		func)
-{
-	if (rtclock_timer_expire == NULL)
-		rtclock_timer_expire = func;
 }
 
 /*
  * Real-time clock device interrupt.
  */
 void
-rtclock_intr(struct i386_interrupt_state *regs)
+rtclock_intr(
+	x86_saved_state_t       *tregs)
 {
-	uint64_t	abstime;
-	uint32_t	latency;
-	uint64_t	decr;
-	uint64_t	decr_tick;
-	uint64_t	decr_timer;
-	cpu_data_t	*pp = current_cpu_datap();
-	rtclock_timer_t	*mytimer = &pp->cpu_rtc_timer;
+	uint64_t        rip;
+	boolean_t       user_mode = FALSE;
 
 	assert(get_preemption_level() > 0);
 	assert(!ml_get_interrupts_enabled());
 
-        abstime = _rtc_nanotime_read();
-	latency = (uint32_t) abstime - pp->cpu_rtc_intr_deadline;
-	if (pp->cpu_rtc_tick_deadline <= abstime) {
-		rtc_nanotime_update();
-		clock_deadline_for_periodic_event(
-			NSEC_PER_HZ, abstime, &pp->cpu_rtc_tick_deadline);
-		hertz_tick(
-#if STAT_TIME
-			   NSEC_PER_HZ,
-#endif
-			   (regs->efl & EFL_VM) || ((regs->cs & 0x03) != 0),
-			   regs->eip);
-	}
-
-	abstime = _rtc_nanotime_read();
-	if (mytimer->is_set && mytimer->deadline <= abstime) {
-		mytimer->has_expired = TRUE;
-		mytimer->is_set = FALSE;
-		(*rtclock_timer_expire)(abstime);
-		assert(!ml_get_interrupts_enabled());
-		mytimer->has_expired = FALSE;
-	}
+	if (is_saved_state64(tregs) == TRUE) {
+		x86_saved_state64_t     *regs;
 
-	/* Log the interrupt service latency (-ve value expected by tool) */
-	KERNEL_DEBUG_CONSTANT(
-		MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) | DBG_FUNC_NONE,
-		-latency, (uint32_t)regs->eip, 0, 0, 0);
+		regs = saved_state64(tregs);
 
-	abstime = _rtc_nanotime_read();
-	decr_tick = deadline_to_decrementer(pp->cpu_rtc_tick_deadline, abstime);
-	decr_timer = (mytimer->is_set) ?
-			deadline_to_decrementer(mytimer->deadline, abstime) :
-			DECREMENTER_MAX;
-	decr = MIN(decr_tick, decr_timer);
-	pp->cpu_rtc_intr_deadline = abstime + decr;
+		if (regs->isf.cs & 0x03) {
+			user_mode = TRUE;
+		}
+		rip = regs->isf.rip;
+	} else {
+		x86_saved_state32_t     *regs;
 
-	rtc_lapic_set_timer(decr);
+		regs = saved_state32(tregs);
 
-	/* Log the new decrementer value */
-	KERNEL_DEBUG_CONSTANT(
-		MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE,
-		decr, 3, 0, 0, 0);
+		if (regs->cs & 0x03) {
+			user_mode = TRUE;
+		}
+		rip = regs->eip;
+	}
 
+	/* call the generic etimer */
+	timer_intr(user_mode, rip);
 }
 
-static void
-rtclock_alarm_expire(
-	__unused timer_call_param_t	p0,
-	__unused timer_call_param_t	p1)
+
+/*
+ *	Request timer pop from the hardware
+ */
+
+uint64_t
+setPop(uint64_t time)
 {
-	mach_timespec_t	clock_time;
+	uint64_t        now;
+	uint64_t        pop;
 
-	(void) sysclk_gettime_internal(&clock_time);
+	/* 0 and EndOfAllTime are special-cases for "clear the timer" */
+	if (time == 0 || time == EndOfAllTime) {
+		time = EndOfAllTime;
+		now = 0;
+		pop = rtc_timer->rtc_set(0, 0);
+	} else {
+		now = rtc_nanotime_read();      /* The time in nanoseconds */
+		pop = rtc_timer->rtc_set(time, now);
+	}
 
-	clock_alarm_intr(SYSTEM_CLOCK, &clock_time);
-}
+	/* Record requested and actual deadlines set */
+	x86_lcpu()->rtcDeadline = time;
+	x86_lcpu()->rtcPop      = pop;
 
-void
-clock_get_uptime(
-	uint64_t		*result)
-{
-        *result = rtc_nanotime_read();
+	return pop - now;
 }
 
 uint64_t
 mach_absolute_time(void)
 {
-        return rtc_nanotime_read();
+	return rtc_nanotime_read();
 }
 
-void
-absolutetime_to_microtime(
-	uint64_t			abstime,
-	uint32_t			*secs,
-	uint32_t			*microsecs)
+uint64_t
+mach_approximate_time(void)
 {
-	uint32_t	remain;
-
-	asm volatile(
-			"divl %3"
-				: "=a" (*secs), "=d" (remain)
-				: "A" (abstime), "r" (NSEC_PER_SEC));
-	asm volatile(
-			"divl %3"
-				: "=a" (*microsecs)
-				: "0" (remain), "d" (0), "r" (NSEC_PER_USEC));
+	return rtc_nanotime_read();
 }
 
 void
-clock_interval_to_deadline(
-	uint32_t		interval,
-	uint32_t		scale_factor,
-	uint64_t		*result)
+clock_interval_to_absolutetime_interval(
+	uint32_t                interval,
+	uint32_t                scale_factor,
+	uint64_t                *result)
 {
-	uint64_t		abstime;
-
-	clock_get_uptime(result);
-
-	clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
-
-	*result += abstime;
+	*result = (uint64_t)interval * scale_factor;
 }
 
 void
-clock_interval_to_absolutetime_interval(
-	uint32_t		interval,
-	uint32_t		scale_factor,
-	uint64_t		*result)
+absolutetime_to_microtime(
+	uint64_t                        abstime,
+	clock_sec_t                     *secs,
+	clock_usec_t            *microsecs)
 {
-	*result = (uint64_t)interval * scale_factor;
+	_absolutetime_to_microtime(abstime, secs, microsecs);
 }
 
 void
-clock_absolutetime_interval_to_deadline(
-	uint64_t		abstime,
-	uint64_t		*result)
+nanotime_to_absolutetime(
+	clock_sec_t                     secs,
+	clock_nsec_t            nanosecs,
+	uint64_t                        *result)
 {
-	clock_get_uptime(result);
-
-	*result += abstime;
+	*result = ((uint64_t)secs * NSEC_PER_SEC) + nanosecs;
 }
 
 void
 absolutetime_to_nanoseconds(
-	uint64_t		abstime,
-	uint64_t		*result)
+	uint64_t                abstime,
+	uint64_t                *result)
 {
 	*result = abstime;
 }
 
 void
 nanoseconds_to_absolutetime(
-	uint64_t		nanoseconds,
-	uint64_t		*result)
+	uint64_t                nanoseconds,
+	uint64_t                *result)
 {
 	*result = nanoseconds;
 }
 
 void
 machine_delay_until(
-	uint64_t		deadline)
+	uint64_t interval,
+	uint64_t                deadline)
 {
-	uint64_t		now;
-
-	do {
+	(void)interval;
+	while (mach_absolute_time() < deadline) {
 		cpu_pause();
-		now = mach_absolute_time();
-	} while (now < deadline);
+	}
 }